xref: /linux/tools/testing/selftests/kvm/include/x86/processor.h (revision 11e8c7e9471cf8e6ae6ec7324a3174191cd965e3)
1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3  * Copyright (C) 2018, Google LLC.
4  */
5 
6 #ifndef SELFTEST_KVM_PROCESSOR_H
7 #define SELFTEST_KVM_PROCESSOR_H
8 
9 #include <assert.h>
10 #include <stdint.h>
11 #include <syscall.h>
12 
13 #include <asm/msr-index.h>
14 #include <asm/prctl.h>
15 
16 #include <linux/kvm_para.h>
17 #include <linux/stringify.h>
18 
19 #include "kvm_util.h"
20 #include "ucall_common.h"
21 
22 extern bool host_cpu_is_intel;
23 extern bool host_cpu_is_amd;
24 extern uint64_t guest_tsc_khz;
25 
26 #ifndef MAX_NR_CPUID_ENTRIES
27 #define MAX_NR_CPUID_ENTRIES 100
28 #endif
29 
30 #define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
31 
32 /* Forced emulation prefix, used to invoke the emulator unconditionally. */
33 #define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
34 
35 #define NMI_VECTOR		0x02
36 
37 const char *ex_str(int vector);
38 
39 #define X86_EFLAGS_FIXED	 (1u << 1)
40 
41 #define X86_CR4_VME		(1ul << 0)
42 #define X86_CR4_PVI		(1ul << 1)
43 #define X86_CR4_TSD		(1ul << 2)
44 #define X86_CR4_DE		(1ul << 3)
45 #define X86_CR4_PSE		(1ul << 4)
46 #define X86_CR4_PAE		(1ul << 5)
47 #define X86_CR4_MCE		(1ul << 6)
48 #define X86_CR4_PGE		(1ul << 7)
49 #define X86_CR4_PCE		(1ul << 8)
50 #define X86_CR4_OSFXSR		(1ul << 9)
51 #define X86_CR4_OSXMMEXCPT	(1ul << 10)
52 #define X86_CR4_UMIP		(1ul << 11)
53 #define X86_CR4_LA57		(1ul << 12)
54 #define X86_CR4_VMXE		(1ul << 13)
55 #define X86_CR4_SMXE		(1ul << 14)
56 #define X86_CR4_FSGSBASE	(1ul << 16)
57 #define X86_CR4_PCIDE		(1ul << 17)
58 #define X86_CR4_OSXSAVE		(1ul << 18)
59 #define X86_CR4_SMEP		(1ul << 20)
60 #define X86_CR4_SMAP		(1ul << 21)
61 #define X86_CR4_PKE		(1ul << 22)
62 
63 struct xstate_header {
64 	u64				xstate_bv;
65 	u64				xcomp_bv;
66 	u64				reserved[6];
67 } __attribute__((packed));
68 
69 struct xstate {
70 	u8				i387[512];
71 	struct xstate_header		header;
72 	u8				extended_state_area[0];
73 } __attribute__ ((packed, aligned (64)));
74 
75 #define XFEATURE_MASK_FP		BIT_ULL(0)
76 #define XFEATURE_MASK_SSE		BIT_ULL(1)
77 #define XFEATURE_MASK_YMM		BIT_ULL(2)
78 #define XFEATURE_MASK_BNDREGS		BIT_ULL(3)
79 #define XFEATURE_MASK_BNDCSR		BIT_ULL(4)
80 #define XFEATURE_MASK_OPMASK		BIT_ULL(5)
81 #define XFEATURE_MASK_ZMM_Hi256		BIT_ULL(6)
82 #define XFEATURE_MASK_Hi16_ZMM		BIT_ULL(7)
83 #define XFEATURE_MASK_PT		BIT_ULL(8)
84 #define XFEATURE_MASK_PKRU		BIT_ULL(9)
85 #define XFEATURE_MASK_PASID		BIT_ULL(10)
86 #define XFEATURE_MASK_CET_USER		BIT_ULL(11)
87 #define XFEATURE_MASK_CET_KERNEL	BIT_ULL(12)
88 #define XFEATURE_MASK_LBR		BIT_ULL(15)
89 #define XFEATURE_MASK_XTILE_CFG		BIT_ULL(17)
90 #define XFEATURE_MASK_XTILE_DATA	BIT_ULL(18)
91 
92 #define XFEATURE_MASK_AVX512		(XFEATURE_MASK_OPMASK | \
93 					 XFEATURE_MASK_ZMM_Hi256 | \
94 					 XFEATURE_MASK_Hi16_ZMM)
95 #define XFEATURE_MASK_XTILE		(XFEATURE_MASK_XTILE_DATA | \
96 					 XFEATURE_MASK_XTILE_CFG)
97 
98 /* Note, these are ordered alphabetically to match kvm_cpuid_entry2.  Eww. */
99 enum cpuid_output_regs {
100 	KVM_CPUID_EAX,
101 	KVM_CPUID_EBX,
102 	KVM_CPUID_ECX,
103 	KVM_CPUID_EDX
104 };
105 
106 /*
107  * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
108  * passed by value with no overhead.
109  */
110 struct kvm_x86_cpu_feature {
111 	u32	function;
112 	u16	index;
113 	u8	reg;
114 	u8	bit;
115 };
116 #define	KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit)				\
117 ({										\
118 	struct kvm_x86_cpu_feature feature = {					\
119 		.function = fn,							\
120 		.index = idx,							\
121 		.reg = KVM_CPUID_##gpr,						\
122 		.bit = __bit,							\
123 	};									\
124 										\
125 	kvm_static_assert((fn & 0xc0000000) == 0 ||				\
126 			  (fn & 0xc0000000) == 0x40000000 ||			\
127 			  (fn & 0xc0000000) == 0x80000000 ||			\
128 			  (fn & 0xc0000000) == 0xc0000000);			\
129 	kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE));	\
130 	feature;								\
131 })
132 
133 /*
134  * Basic Leafs, a.k.a. Intel defined
135  */
136 #define	X86_FEATURE_MWAIT		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
137 #define	X86_FEATURE_VMX			KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
138 #define	X86_FEATURE_SMX			KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
139 #define	X86_FEATURE_PDCM		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
140 #define	X86_FEATURE_PCID		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
141 #define X86_FEATURE_X2APIC		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
142 #define	X86_FEATURE_MOVBE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
143 #define	X86_FEATURE_TSC_DEADLINE_TIMER	KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
144 #define	X86_FEATURE_XSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
145 #define	X86_FEATURE_OSXSAVE		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
146 #define	X86_FEATURE_RDRAND		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
147 #define	X86_FEATURE_HYPERVISOR		KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
148 #define X86_FEATURE_PAE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
149 #define	X86_FEATURE_MCE			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
150 #define	X86_FEATURE_APIC		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
151 #define	X86_FEATURE_CLFLUSH		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
152 #define	X86_FEATURE_XMM			KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
153 #define	X86_FEATURE_XMM2		KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
154 #define	X86_FEATURE_FSGSBASE		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
155 #define	X86_FEATURE_TSC_ADJUST		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
156 #define	X86_FEATURE_SGX			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
157 #define	X86_FEATURE_HLE			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
158 #define	X86_FEATURE_SMEP	        KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
159 #define	X86_FEATURE_INVPCID		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
160 #define	X86_FEATURE_RTM			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
161 #define	X86_FEATURE_MPX			KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
162 #define	X86_FEATURE_SMAP		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
163 #define	X86_FEATURE_PCOMMIT		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
164 #define	X86_FEATURE_CLFLUSHOPT		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
165 #define	X86_FEATURE_CLWB		KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
166 #define	X86_FEATURE_UMIP		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
167 #define	X86_FEATURE_PKU			KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
168 #define	X86_FEATURE_OSPKE		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
169 #define	X86_FEATURE_LA57		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
170 #define	X86_FEATURE_RDPID		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
171 #define	X86_FEATURE_SGX_LC		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
172 #define	X86_FEATURE_SHSTK		KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
173 #define	X86_FEATURE_IBT			KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
174 #define	X86_FEATURE_AMX_TILE		KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
175 #define	X86_FEATURE_SPEC_CTRL		KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
176 #define	X86_FEATURE_ARCH_CAPABILITIES	KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
177 #define	X86_FEATURE_PKS			KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
178 #define	X86_FEATURE_XTILECFG		KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
179 #define	X86_FEATURE_XTILEDATA		KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
180 #define	X86_FEATURE_XSAVES		KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
181 #define	X86_FEATURE_XFD			KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
182 #define X86_FEATURE_XTILEDATA_XFD	KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
183 
184 /*
185  * Extended Leafs, a.k.a. AMD defined
186  */
187 #define	X86_FEATURE_SVM			KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
188 #define	X86_FEATURE_PERFCTR_CORE	KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
189 #define	X86_FEATURE_PERFCTR_NB		KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
190 #define	X86_FEATURE_PERFCTR_LLC		KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
191 #define	X86_FEATURE_NX			KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
192 #define	X86_FEATURE_GBPAGES		KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
193 #define	X86_FEATURE_RDTSCP		KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
194 #define	X86_FEATURE_LM			KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
195 #define	X86_FEATURE_INVTSC		KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
196 #define	X86_FEATURE_RDPRU		KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
197 #define	X86_FEATURE_AMD_IBPB		KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
198 #define	X86_FEATURE_NPT			KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
199 #define	X86_FEATURE_LBRV		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
200 #define	X86_FEATURE_NRIPS		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
201 #define X86_FEATURE_TSCRATEMSR          KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
202 #define X86_FEATURE_PAUSEFILTER         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
203 #define X86_FEATURE_PFTHRESHOLD         KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
204 #define	X86_FEATURE_V_VMSAVE_VMLOAD	KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15)
205 #define	X86_FEATURE_VGIF		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
206 #define X86_FEATURE_IDLE_HLT		KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
207 #define X86_FEATURE_SEV			KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
208 #define X86_FEATURE_SEV_ES		KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
209 #define X86_FEATURE_SEV_SNP		KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
210 #define	X86_FEATURE_PERFMON_V2		KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
211 #define	X86_FEATURE_LBR_PMC_FREEZE	KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
212 
213 /*
214  * KVM defined paravirt features.
215  */
216 #define X86_FEATURE_KVM_CLOCKSOURCE	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
217 #define X86_FEATURE_KVM_NOP_IO_DELAY	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
218 #define X86_FEATURE_KVM_MMU_OP		KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
219 #define X86_FEATURE_KVM_CLOCKSOURCE2	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
220 #define X86_FEATURE_KVM_ASYNC_PF	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
221 #define X86_FEATURE_KVM_STEAL_TIME	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
222 #define X86_FEATURE_KVM_PV_EOI		KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
223 #define X86_FEATURE_KVM_PV_UNHALT	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
224 /* Bit 8 apparently isn't used?!?! */
225 #define X86_FEATURE_KVM_PV_TLB_FLUSH	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
226 #define X86_FEATURE_KVM_ASYNC_PF_VMEXIT	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
227 #define X86_FEATURE_KVM_PV_SEND_IPI	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
228 #define X86_FEATURE_KVM_POLL_CONTROL	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
229 #define X86_FEATURE_KVM_PV_SCHED_YIELD	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
230 #define X86_FEATURE_KVM_ASYNC_PF_INT	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
231 #define X86_FEATURE_KVM_MSI_EXT_DEST_ID	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
232 #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
233 #define X86_FEATURE_KVM_MIGRATION_CONTROL	KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
234 
235 /*
236  * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
237  * value/property as opposed to a single-bit feature.  Again, pack the info
238  * into a 64-bit value to pass by value with no overhead.
239  */
240 struct kvm_x86_cpu_property {
241 	u32	function;
242 	u8	index;
243 	u8	reg;
244 	u8	lo_bit;
245 	u8	hi_bit;
246 };
247 #define	KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit)			\
248 ({										\
249 	struct kvm_x86_cpu_property property = {				\
250 		.function = fn,							\
251 		.index = idx,							\
252 		.reg = KVM_CPUID_##gpr,						\
253 		.lo_bit = low_bit,						\
254 		.hi_bit = high_bit,						\
255 	};									\
256 										\
257 	kvm_static_assert(low_bit < high_bit);					\
258 	kvm_static_assert((fn & 0xc0000000) == 0 ||				\
259 			  (fn & 0xc0000000) == 0x40000000 ||			\
260 			  (fn & 0xc0000000) == 0x80000000 ||			\
261 			  (fn & 0xc0000000) == 0xc0000000);			\
262 	kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE));	\
263 	property;								\
264 })
265 
266 #define X86_PROPERTY_MAX_BASIC_LEAF		KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
267 #define X86_PROPERTY_PMU_VERSION		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
268 #define X86_PROPERTY_PMU_NR_GP_COUNTERS		KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
269 #define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH	KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
270 #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH	KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
271 #define X86_PROPERTY_PMU_EVENTS_MASK		KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12)
272 #define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK	KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
273 #define X86_PROPERTY_PMU_NR_FIXED_COUNTERS	KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
274 #define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH	KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
275 
276 #define X86_PROPERTY_SUPPORTED_XCR0_LO		KVM_X86_CPU_PROPERTY(0xd,  0, EAX,  0, 31)
277 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0	KVM_X86_CPU_PROPERTY(0xd,  0, EBX,  0, 31)
278 #define X86_PROPERTY_XSTATE_MAX_SIZE		KVM_X86_CPU_PROPERTY(0xd,  0, ECX,  0, 31)
279 #define X86_PROPERTY_SUPPORTED_XCR0_HI		KVM_X86_CPU_PROPERTY(0xd,  0, EDX,  0, 31)
280 
281 #define X86_PROPERTY_XSTATE_TILE_SIZE		KVM_X86_CPU_PROPERTY(0xd, 18, EAX,  0, 31)
282 #define X86_PROPERTY_XSTATE_TILE_OFFSET		KVM_X86_CPU_PROPERTY(0xd, 18, EBX,  0, 31)
283 #define X86_PROPERTY_AMX_MAX_PALETTE_TABLES	KVM_X86_CPU_PROPERTY(0x1d, 0, EAX,  0, 31)
284 #define X86_PROPERTY_AMX_TOTAL_TILE_BYTES	KVM_X86_CPU_PROPERTY(0x1d, 1, EAX,  0, 15)
285 #define X86_PROPERTY_AMX_BYTES_PER_TILE		KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
286 #define X86_PROPERTY_AMX_BYTES_PER_ROW		KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0,  15)
287 #define X86_PROPERTY_AMX_NR_TILE_REGS		KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
288 #define X86_PROPERTY_AMX_MAX_ROWS		KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0,  15)
289 
290 #define X86_PROPERTY_MAX_KVM_LEAF		KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
291 
292 #define X86_PROPERTY_MAX_EXT_LEAF		KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
293 #define X86_PROPERTY_MAX_PHY_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
294 #define X86_PROPERTY_MAX_VIRT_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
295 #define X86_PROPERTY_GUEST_MAX_PHY_ADDR		KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
296 #define X86_PROPERTY_SEV_C_BIT			KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
297 #define X86_PROPERTY_PHYS_ADDR_REDUCTION	KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
298 #define X86_PROPERTY_NR_PERFCTR_CORE		KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
299 #define X86_PROPERTY_NR_PERFCTR_NB		KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
300 
301 #define X86_PROPERTY_MAX_CENTAUR_LEAF		KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
302 
303 /*
304  * Intel's architectural PMU events are bizarre.  They have a "feature" bit
305  * that indicates the feature is _not_ supported, and a property that states
306  * the length of the bit mask of unsupported features.  A feature is supported
307  * if the size of the bit mask is larger than the "unavailable" bit, and said
308  * bit is not set.  Fixed counters also bizarre enumeration, but inverted from
309  * arch events for general purpose counters.  Fixed counters are supported if a
310  * feature flag is set **OR** the total number of fixed counters is greater
311  * than index of the counter.
312  *
313  * Wrap the events for general purpose and fixed counters to simplify checking
314  * whether or not a given architectural event is supported.
315  */
316 struct kvm_x86_pmu_feature {
317 	struct kvm_x86_cpu_feature f;
318 };
319 #define	KVM_X86_PMU_FEATURE(__reg, __bit)				\
320 ({									\
321 	struct kvm_x86_pmu_feature feature = {				\
322 		.f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit),		\
323 	};								\
324 									\
325 	kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX ||		\
326 			  KVM_CPUID_##__reg == KVM_CPUID_ECX);		\
327 	feature;							\
328 })
329 
330 #define X86_PMU_FEATURE_CPU_CYCLES			KVM_X86_PMU_FEATURE(EBX, 0)
331 #define X86_PMU_FEATURE_INSNS_RETIRED			KVM_X86_PMU_FEATURE(EBX, 1)
332 #define X86_PMU_FEATURE_REFERENCE_CYCLES		KVM_X86_PMU_FEATURE(EBX, 2)
333 #define X86_PMU_FEATURE_LLC_REFERENCES			KVM_X86_PMU_FEATURE(EBX, 3)
334 #define X86_PMU_FEATURE_LLC_MISSES			KVM_X86_PMU_FEATURE(EBX, 4)
335 #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED		KVM_X86_PMU_FEATURE(EBX, 5)
336 #define X86_PMU_FEATURE_BRANCHES_MISPREDICTED		KVM_X86_PMU_FEATURE(EBX, 6)
337 #define X86_PMU_FEATURE_TOPDOWN_SLOTS			KVM_X86_PMU_FEATURE(EBX, 7)
338 #define X86_PMU_FEATURE_TOPDOWN_BE_BOUND		KVM_X86_PMU_FEATURE(EBX, 8)
339 #define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC		KVM_X86_PMU_FEATURE(EBX, 9)
340 #define X86_PMU_FEATURE_TOPDOWN_FE_BOUND		KVM_X86_PMU_FEATURE(EBX, 10)
341 #define X86_PMU_FEATURE_TOPDOWN_RETIRING		KVM_X86_PMU_FEATURE(EBX, 11)
342 #define X86_PMU_FEATURE_LBR_INSERTS			KVM_X86_PMU_FEATURE(EBX, 12)
343 
344 #define X86_PMU_FEATURE_INSNS_RETIRED_FIXED		KVM_X86_PMU_FEATURE(ECX, 0)
345 #define X86_PMU_FEATURE_CPU_CYCLES_FIXED		KVM_X86_PMU_FEATURE(ECX, 1)
346 #define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED	KVM_X86_PMU_FEATURE(ECX, 2)
347 #define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED		KVM_X86_PMU_FEATURE(ECX, 3)
348 
x86_family(unsigned int eax)349 static inline unsigned int x86_family(unsigned int eax)
350 {
351 	unsigned int x86;
352 
353 	x86 = (eax >> 8) & 0xf;
354 
355 	if (x86 == 0xf)
356 		x86 += (eax >> 20) & 0xff;
357 
358 	return x86;
359 }
360 
x86_model(unsigned int eax)361 static inline unsigned int x86_model(unsigned int eax)
362 {
363 	return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
364 }
365 
366 #define PHYSICAL_PAGE_MASK      GENMASK_ULL(51, 12)
367 
368 #define PAGE_SHIFT		12
369 #define PAGE_SIZE		(1ULL << PAGE_SHIFT)
370 #define PAGE_MASK		(~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
371 
372 #define HUGEPAGE_SHIFT(x)	(PAGE_SHIFT + (((x) - 1) * 9))
373 #define HUGEPAGE_SIZE(x)	(1UL << HUGEPAGE_SHIFT(x))
374 #define HUGEPAGE_MASK(x)	(~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
375 
376 #define PTE_GET_PA(pte)		((pte) & PHYSICAL_PAGE_MASK)
377 #define PTE_GET_PFN(pte)        (PTE_GET_PA(pte) >> PAGE_SHIFT)
378 
379 /* General Registers in 64-Bit Mode */
380 struct gpr64_regs {
381 	u64 rax;
382 	u64 rcx;
383 	u64 rdx;
384 	u64 rbx;
385 	u64 rsp;
386 	u64 rbp;
387 	u64 rsi;
388 	u64 rdi;
389 	u64 r8;
390 	u64 r9;
391 	u64 r10;
392 	u64 r11;
393 	u64 r12;
394 	u64 r13;
395 	u64 r14;
396 	u64 r15;
397 };
398 
399 struct desc64 {
400 	uint16_t limit0;
401 	uint16_t base0;
402 	unsigned base1:8, type:4, s:1, dpl:2, p:1;
403 	unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
404 	uint32_t base3;
405 	uint32_t zero1;
406 } __attribute__((packed));
407 
408 struct desc_ptr {
409 	uint16_t size;
410 	uint64_t address;
411 } __attribute__((packed));
412 
413 struct kvm_x86_state {
414 	struct kvm_xsave *xsave;
415 	struct kvm_vcpu_events events;
416 	struct kvm_mp_state mp_state;
417 	struct kvm_regs regs;
418 	struct kvm_xcrs xcrs;
419 	struct kvm_sregs sregs;
420 	struct kvm_debugregs debugregs;
421 	union {
422 		struct kvm_nested_state nested;
423 		char nested_[16384];
424 	};
425 	struct kvm_msrs msrs;
426 };
427 
get_desc64_base(const struct desc64 * desc)428 static inline uint64_t get_desc64_base(const struct desc64 *desc)
429 {
430 	return (uint64_t)desc->base3 << 32 |
431 	       (uint64_t)desc->base2 << 24 |
432 	       (uint64_t)desc->base1 << 16 |
433 	       (uint64_t)desc->base0;
434 }
435 
rdtsc(void)436 static inline uint64_t rdtsc(void)
437 {
438 	uint32_t eax, edx;
439 	uint64_t tsc_val;
440 	/*
441 	 * The lfence is to wait (on Intel CPUs) until all previous
442 	 * instructions have been executed. If software requires RDTSC to be
443 	 * executed prior to execution of any subsequent instruction, it can
444 	 * execute LFENCE immediately after RDTSC
445 	 */
446 	__asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
447 	tsc_val = ((uint64_t)edx) << 32 | eax;
448 	return tsc_val;
449 }
450 
rdtscp(uint32_t * aux)451 static inline uint64_t rdtscp(uint32_t *aux)
452 {
453 	uint32_t eax, edx;
454 
455 	__asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
456 	return ((uint64_t)edx) << 32 | eax;
457 }
458 
rdmsr(uint32_t msr)459 static inline uint64_t rdmsr(uint32_t msr)
460 {
461 	uint32_t a, d;
462 
463 	__asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
464 
465 	return a | ((uint64_t) d << 32);
466 }
467 
wrmsr(uint32_t msr,uint64_t value)468 static inline void wrmsr(uint32_t msr, uint64_t value)
469 {
470 	uint32_t a = value;
471 	uint32_t d = value >> 32;
472 
473 	__asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
474 }
475 
476 
inw(uint16_t port)477 static inline uint16_t inw(uint16_t port)
478 {
479 	uint16_t tmp;
480 
481 	__asm__ __volatile__("in %%dx, %%ax"
482 		: /* output */ "=a" (tmp)
483 		: /* input */ "d" (port));
484 
485 	return tmp;
486 }
487 
get_es(void)488 static inline uint16_t get_es(void)
489 {
490 	uint16_t es;
491 
492 	__asm__ __volatile__("mov %%es, %[es]"
493 			     : /* output */ [es]"=rm"(es));
494 	return es;
495 }
496 
get_cs(void)497 static inline uint16_t get_cs(void)
498 {
499 	uint16_t cs;
500 
501 	__asm__ __volatile__("mov %%cs, %[cs]"
502 			     : /* output */ [cs]"=rm"(cs));
503 	return cs;
504 }
505 
get_ss(void)506 static inline uint16_t get_ss(void)
507 {
508 	uint16_t ss;
509 
510 	__asm__ __volatile__("mov %%ss, %[ss]"
511 			     : /* output */ [ss]"=rm"(ss));
512 	return ss;
513 }
514 
get_ds(void)515 static inline uint16_t get_ds(void)
516 {
517 	uint16_t ds;
518 
519 	__asm__ __volatile__("mov %%ds, %[ds]"
520 			     : /* output */ [ds]"=rm"(ds));
521 	return ds;
522 }
523 
get_fs(void)524 static inline uint16_t get_fs(void)
525 {
526 	uint16_t fs;
527 
528 	__asm__ __volatile__("mov %%fs, %[fs]"
529 			     : /* output */ [fs]"=rm"(fs));
530 	return fs;
531 }
532 
get_gs(void)533 static inline uint16_t get_gs(void)
534 {
535 	uint16_t gs;
536 
537 	__asm__ __volatile__("mov %%gs, %[gs]"
538 			     : /* output */ [gs]"=rm"(gs));
539 	return gs;
540 }
541 
get_tr(void)542 static inline uint16_t get_tr(void)
543 {
544 	uint16_t tr;
545 
546 	__asm__ __volatile__("str %[tr]"
547 			     : /* output */ [tr]"=rm"(tr));
548 	return tr;
549 }
550 
get_cr0(void)551 static inline uint64_t get_cr0(void)
552 {
553 	uint64_t cr0;
554 
555 	__asm__ __volatile__("mov %%cr0, %[cr0]"
556 			     : /* output */ [cr0]"=r"(cr0));
557 	return cr0;
558 }
559 
set_cr0(uint64_t val)560 static inline void set_cr0(uint64_t val)
561 {
562 	__asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory");
563 }
564 
get_cr3(void)565 static inline uint64_t get_cr3(void)
566 {
567 	uint64_t cr3;
568 
569 	__asm__ __volatile__("mov %%cr3, %[cr3]"
570 			     : /* output */ [cr3]"=r"(cr3));
571 	return cr3;
572 }
573 
set_cr3(uint64_t val)574 static inline void set_cr3(uint64_t val)
575 {
576 	__asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory");
577 }
578 
get_cr4(void)579 static inline uint64_t get_cr4(void)
580 {
581 	uint64_t cr4;
582 
583 	__asm__ __volatile__("mov %%cr4, %[cr4]"
584 			     : /* output */ [cr4]"=r"(cr4));
585 	return cr4;
586 }
587 
set_cr4(uint64_t val)588 static inline void set_cr4(uint64_t val)
589 {
590 	__asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
591 }
592 
get_cr8(void)593 static inline uint64_t get_cr8(void)
594 {
595 	uint64_t cr8;
596 
597 	__asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8));
598 	return cr8;
599 }
600 
set_cr8(uint64_t val)601 static inline void set_cr8(uint64_t val)
602 {
603 	__asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory");
604 }
605 
set_idt(const struct desc_ptr * idt_desc)606 static inline void set_idt(const struct desc_ptr *idt_desc)
607 {
608 	__asm__ __volatile__("lidt %0"::"m"(*idt_desc));
609 }
610 
xgetbv(u32 index)611 static inline u64 xgetbv(u32 index)
612 {
613 	u32 eax, edx;
614 
615 	__asm__ __volatile__("xgetbv;"
616 		     : "=a" (eax), "=d" (edx)
617 		     : "c" (index));
618 	return eax | ((u64)edx << 32);
619 }
620 
xsetbv(u32 index,u64 value)621 static inline void xsetbv(u32 index, u64 value)
622 {
623 	u32 eax = value;
624 	u32 edx = value >> 32;
625 
626 	__asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
627 }
628 
wrpkru(u32 pkru)629 static inline void wrpkru(u32 pkru)
630 {
631 	/* Note, ECX and EDX are architecturally required to be '0'. */
632 	asm volatile(".byte 0x0f,0x01,0xef\n\t"
633 		     : : "a" (pkru), "c"(0), "d"(0));
634 }
635 
get_gdt(void)636 static inline struct desc_ptr get_gdt(void)
637 {
638 	struct desc_ptr gdt;
639 	__asm__ __volatile__("sgdt %[gdt]"
640 			     : /* output */ [gdt]"=m"(gdt));
641 	return gdt;
642 }
643 
get_idt(void)644 static inline struct desc_ptr get_idt(void)
645 {
646 	struct desc_ptr idt;
647 	__asm__ __volatile__("sidt %[idt]"
648 			     : /* output */ [idt]"=m"(idt));
649 	return idt;
650 }
651 
outl(uint16_t port,uint32_t value)652 static inline void outl(uint16_t port, uint32_t value)
653 {
654 	__asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
655 }
656 
__cpuid(uint32_t function,uint32_t index,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)657 static inline void __cpuid(uint32_t function, uint32_t index,
658 			   uint32_t *eax, uint32_t *ebx,
659 			   uint32_t *ecx, uint32_t *edx)
660 {
661 	*eax = function;
662 	*ecx = index;
663 
664 	asm volatile("cpuid"
665 	    : "=a" (*eax),
666 	      "=b" (*ebx),
667 	      "=c" (*ecx),
668 	      "=d" (*edx)
669 	    : "0" (*eax), "2" (*ecx)
670 	    : "memory");
671 }
672 
cpuid(uint32_t function,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)673 static inline void cpuid(uint32_t function,
674 			 uint32_t *eax, uint32_t *ebx,
675 			 uint32_t *ecx, uint32_t *edx)
676 {
677 	return __cpuid(function, 0, eax, ebx, ecx, edx);
678 }
679 
this_cpu_fms(void)680 static inline uint32_t this_cpu_fms(void)
681 {
682 	uint32_t eax, ebx, ecx, edx;
683 
684 	cpuid(1, &eax, &ebx, &ecx, &edx);
685 	return eax;
686 }
687 
this_cpu_family(void)688 static inline uint32_t this_cpu_family(void)
689 {
690 	return x86_family(this_cpu_fms());
691 }
692 
this_cpu_model(void)693 static inline uint32_t this_cpu_model(void)
694 {
695 	return x86_model(this_cpu_fms());
696 }
697 
this_cpu_vendor_string_is(const char * vendor)698 static inline bool this_cpu_vendor_string_is(const char *vendor)
699 {
700 	const uint32_t *chunk = (const uint32_t *)vendor;
701 	uint32_t eax, ebx, ecx, edx;
702 
703 	cpuid(0, &eax, &ebx, &ecx, &edx);
704 	return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
705 }
706 
this_cpu_is_intel(void)707 static inline bool this_cpu_is_intel(void)
708 {
709 	return this_cpu_vendor_string_is("GenuineIntel");
710 }
711 
712 /*
713  * Exclude early K5 samples with a vendor string of "AMDisbetter!"
714  */
this_cpu_is_amd(void)715 static inline bool this_cpu_is_amd(void)
716 {
717 	return this_cpu_vendor_string_is("AuthenticAMD");
718 }
719 
__this_cpu_has(uint32_t function,uint32_t index,uint8_t reg,uint8_t lo,uint8_t hi)720 static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
721 				      uint8_t reg, uint8_t lo, uint8_t hi)
722 {
723 	uint32_t gprs[4];
724 
725 	__cpuid(function, index,
726 		&gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
727 		&gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
728 
729 	return (gprs[reg] & GENMASK(hi, lo)) >> lo;
730 }
731 
this_cpu_has(struct kvm_x86_cpu_feature feature)732 static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
733 {
734 	return __this_cpu_has(feature.function, feature.index,
735 			      feature.reg, feature.bit, feature.bit);
736 }
737 
this_cpu_property(struct kvm_x86_cpu_property property)738 static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
739 {
740 	return __this_cpu_has(property.function, property.index,
741 			      property.reg, property.lo_bit, property.hi_bit);
742 }
743 
this_cpu_has_p(struct kvm_x86_cpu_property property)744 static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
745 {
746 	uint32_t max_leaf;
747 
748 	switch (property.function & 0xc0000000) {
749 	case 0:
750 		max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
751 		break;
752 	case 0x40000000:
753 		max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
754 		break;
755 	case 0x80000000:
756 		max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
757 		break;
758 	case 0xc0000000:
759 		max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
760 	}
761 	return max_leaf >= property.function;
762 }
763 
this_pmu_has(struct kvm_x86_pmu_feature feature)764 static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
765 {
766 	uint32_t nr_bits;
767 
768 	if (feature.f.reg == KVM_CPUID_EBX) {
769 		nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
770 		return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
771 	}
772 
773 	GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
774 	nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
775 	return nr_bits > feature.f.bit || this_cpu_has(feature.f);
776 }
777 
this_cpu_supported_xcr0(void)778 static __always_inline uint64_t this_cpu_supported_xcr0(void)
779 {
780 	if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
781 		return 0;
782 
783 	return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
784 	       ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
785 }
786 
787 typedef u32		__attribute__((vector_size(16))) sse128_t;
788 #define __sse128_u	union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
789 #define sse128_lo(x)	({ __sse128_u t; t.vec = x; t.as_u64[0]; })
790 #define sse128_hi(x)	({ __sse128_u t; t.vec = x; t.as_u64[1]; })
791 
read_sse_reg(int reg,sse128_t * data)792 static inline void read_sse_reg(int reg, sse128_t *data)
793 {
794 	switch (reg) {
795 	case 0:
796 		asm("movdqa %%xmm0, %0" : "=m"(*data));
797 		break;
798 	case 1:
799 		asm("movdqa %%xmm1, %0" : "=m"(*data));
800 		break;
801 	case 2:
802 		asm("movdqa %%xmm2, %0" : "=m"(*data));
803 		break;
804 	case 3:
805 		asm("movdqa %%xmm3, %0" : "=m"(*data));
806 		break;
807 	case 4:
808 		asm("movdqa %%xmm4, %0" : "=m"(*data));
809 		break;
810 	case 5:
811 		asm("movdqa %%xmm5, %0" : "=m"(*data));
812 		break;
813 	case 6:
814 		asm("movdqa %%xmm6, %0" : "=m"(*data));
815 		break;
816 	case 7:
817 		asm("movdqa %%xmm7, %0" : "=m"(*data));
818 		break;
819 	default:
820 		BUG();
821 	}
822 }
823 
write_sse_reg(int reg,const sse128_t * data)824 static inline void write_sse_reg(int reg, const sse128_t *data)
825 {
826 	switch (reg) {
827 	case 0:
828 		asm("movdqa %0, %%xmm0" : : "m"(*data));
829 		break;
830 	case 1:
831 		asm("movdqa %0, %%xmm1" : : "m"(*data));
832 		break;
833 	case 2:
834 		asm("movdqa %0, %%xmm2" : : "m"(*data));
835 		break;
836 	case 3:
837 		asm("movdqa %0, %%xmm3" : : "m"(*data));
838 		break;
839 	case 4:
840 		asm("movdqa %0, %%xmm4" : : "m"(*data));
841 		break;
842 	case 5:
843 		asm("movdqa %0, %%xmm5" : : "m"(*data));
844 		break;
845 	case 6:
846 		asm("movdqa %0, %%xmm6" : : "m"(*data));
847 		break;
848 	case 7:
849 		asm("movdqa %0, %%xmm7" : : "m"(*data));
850 		break;
851 	default:
852 		BUG();
853 	}
854 }
855 
cpu_relax(void)856 static inline void cpu_relax(void)
857 {
858 	asm volatile("rep; nop" ::: "memory");
859 }
860 
udelay(unsigned long usec)861 static inline void udelay(unsigned long usec)
862 {
863 	uint64_t start, now, cycles;
864 
865 	GUEST_ASSERT(guest_tsc_khz);
866 	cycles = guest_tsc_khz / 1000 * usec;
867 
868 	/*
869 	 * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
870 	 * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
871 	 */
872 	start = rdtsc();
873 	do {
874 		now = rdtsc();
875 	} while (now - start < cycles);
876 }
877 
878 #define ud2()			\
879 	__asm__ __volatile__(	\
880 		"ud2\n"	\
881 		)
882 
883 #define hlt()			\
884 	__asm__ __volatile__(	\
885 		"hlt\n"	\
886 		)
887 
888 struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
889 void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
890 void kvm_x86_state_cleanup(struct kvm_x86_state *state);
891 
892 const struct kvm_msr_list *kvm_get_msr_index_list(void);
893 const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
894 bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
895 uint64_t kvm_get_feature_msr(uint64_t msr_index);
896 
vcpu_msrs_get(struct kvm_vcpu * vcpu,struct kvm_msrs * msrs)897 static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
898 				 struct kvm_msrs *msrs)
899 {
900 	int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
901 
902 	TEST_ASSERT(r == msrs->nmsrs,
903 		    "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
904 		    r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
905 }
vcpu_msrs_set(struct kvm_vcpu * vcpu,struct kvm_msrs * msrs)906 static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
907 {
908 	int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
909 
910 	TEST_ASSERT(r == msrs->nmsrs,
911 		    "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
912 		    r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
913 }
vcpu_debugregs_get(struct kvm_vcpu * vcpu,struct kvm_debugregs * debugregs)914 static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
915 				      struct kvm_debugregs *debugregs)
916 {
917 	vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
918 }
vcpu_debugregs_set(struct kvm_vcpu * vcpu,struct kvm_debugregs * debugregs)919 static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
920 				      struct kvm_debugregs *debugregs)
921 {
922 	vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
923 }
vcpu_xsave_get(struct kvm_vcpu * vcpu,struct kvm_xsave * xsave)924 static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
925 				  struct kvm_xsave *xsave)
926 {
927 	vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
928 }
vcpu_xsave2_get(struct kvm_vcpu * vcpu,struct kvm_xsave * xsave)929 static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
930 				   struct kvm_xsave *xsave)
931 {
932 	vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
933 }
vcpu_xsave_set(struct kvm_vcpu * vcpu,struct kvm_xsave * xsave)934 static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
935 				  struct kvm_xsave *xsave)
936 {
937 	vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
938 }
vcpu_xcrs_get(struct kvm_vcpu * vcpu,struct kvm_xcrs * xcrs)939 static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
940 				 struct kvm_xcrs *xcrs)
941 {
942 	vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
943 }
vcpu_xcrs_set(struct kvm_vcpu * vcpu,struct kvm_xcrs * xcrs)944 static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
945 {
946 	vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
947 }
948 
949 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
950 					       uint32_t function, uint32_t index);
951 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
952 
kvm_cpu_fms(void)953 static inline uint32_t kvm_cpu_fms(void)
954 {
955 	return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
956 }
957 
kvm_cpu_family(void)958 static inline uint32_t kvm_cpu_family(void)
959 {
960 	return x86_family(kvm_cpu_fms());
961 }
962 
kvm_cpu_model(void)963 static inline uint32_t kvm_cpu_model(void)
964 {
965 	return x86_model(kvm_cpu_fms());
966 }
967 
968 bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
969 		   struct kvm_x86_cpu_feature feature);
970 
kvm_cpu_has(struct kvm_x86_cpu_feature feature)971 static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
972 {
973 	return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
974 }
975 
976 uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
977 			    struct kvm_x86_cpu_property property);
978 
kvm_cpu_property(struct kvm_x86_cpu_property property)979 static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
980 {
981 	return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
982 }
983 
kvm_cpu_has_p(struct kvm_x86_cpu_property property)984 static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
985 {
986 	uint32_t max_leaf;
987 
988 	switch (property.function & 0xc0000000) {
989 	case 0:
990 		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
991 		break;
992 	case 0x40000000:
993 		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
994 		break;
995 	case 0x80000000:
996 		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
997 		break;
998 	case 0xc0000000:
999 		max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
1000 	}
1001 	return max_leaf >= property.function;
1002 }
1003 
kvm_pmu_has(struct kvm_x86_pmu_feature feature)1004 static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
1005 {
1006 	uint32_t nr_bits;
1007 
1008 	if (feature.f.reg == KVM_CPUID_EBX) {
1009 		nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
1010 		return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
1011 	}
1012 
1013 	TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
1014 	nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
1015 	return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
1016 }
1017 
kvm_cpu_supported_xcr0(void)1018 static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
1019 {
1020 	if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
1021 		return 0;
1022 
1023 	return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
1024 	       ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
1025 }
1026 
kvm_cpuid2_size(int nr_entries)1027 static inline size_t kvm_cpuid2_size(int nr_entries)
1028 {
1029 	return sizeof(struct kvm_cpuid2) +
1030 	       sizeof(struct kvm_cpuid_entry2) * nr_entries;
1031 }
1032 
1033 /*
1034  * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
1035  * entries sized to hold @nr_entries.  The caller is responsible for freeing
1036  * the struct.
1037  */
allocate_kvm_cpuid2(int nr_entries)1038 static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
1039 {
1040 	struct kvm_cpuid2 *cpuid;
1041 
1042 	cpuid = malloc(kvm_cpuid2_size(nr_entries));
1043 	TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
1044 
1045 	cpuid->nent = nr_entries;
1046 
1047 	return cpuid;
1048 }
1049 
1050 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
1051 
vcpu_get_cpuid(struct kvm_vcpu * vcpu)1052 static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
1053 {
1054 	vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
1055 }
1056 
__vcpu_get_cpuid_entry(struct kvm_vcpu * vcpu,uint32_t function,uint32_t index)1057 static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
1058 							      uint32_t function,
1059 							      uint32_t index)
1060 {
1061 	TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
1062 
1063 	vcpu_get_cpuid(vcpu);
1064 
1065 	return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
1066 							  function, index);
1067 }
1068 
vcpu_get_cpuid_entry(struct kvm_vcpu * vcpu,uint32_t function)1069 static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
1070 							    uint32_t function)
1071 {
1072 	return __vcpu_get_cpuid_entry(vcpu, function, 0);
1073 }
1074 
__vcpu_set_cpuid(struct kvm_vcpu * vcpu)1075 static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
1076 {
1077 	int r;
1078 
1079 	TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
1080 	r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
1081 	if (r)
1082 		return r;
1083 
1084 	/* On success, refresh the cache to pick up adjustments made by KVM. */
1085 	vcpu_get_cpuid(vcpu);
1086 	return 0;
1087 }
1088 
vcpu_set_cpuid(struct kvm_vcpu * vcpu)1089 static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
1090 {
1091 	TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
1092 	vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
1093 
1094 	/* Refresh the cache to pick up adjustments made by KVM. */
1095 	vcpu_get_cpuid(vcpu);
1096 }
1097 
1098 void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
1099 			     struct kvm_x86_cpu_property property,
1100 			     uint32_t value);
1101 void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
1102 
1103 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
1104 
vcpu_cpuid_has(struct kvm_vcpu * vcpu,struct kvm_x86_cpu_feature feature)1105 static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
1106 				  struct kvm_x86_cpu_feature feature)
1107 {
1108 	struct kvm_cpuid_entry2 *entry;
1109 
1110 	entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
1111 	return *((&entry->eax) + feature.reg) & BIT(feature.bit);
1112 }
1113 
1114 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
1115 				     struct kvm_x86_cpu_feature feature,
1116 				     bool set);
1117 
vcpu_set_cpuid_feature(struct kvm_vcpu * vcpu,struct kvm_x86_cpu_feature feature)1118 static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
1119 					  struct kvm_x86_cpu_feature feature)
1120 {
1121 	vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
1122 
1123 }
1124 
vcpu_clear_cpuid_feature(struct kvm_vcpu * vcpu,struct kvm_x86_cpu_feature feature)1125 static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
1126 					    struct kvm_x86_cpu_feature feature)
1127 {
1128 	vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
1129 }
1130 
1131 uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
1132 int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
1133 
1134 /*
1135  * Assert on an MSR access(es) and pretty print the MSR name when possible.
1136  * Note, the caller provides the stringified name so that the name of macro is
1137  * printed, not the value the macro resolves to (due to macro expansion).
1138  */
1139 #define TEST_ASSERT_MSR(cond, fmt, msr, str, args...)				\
1140 do {										\
1141 	if (__builtin_constant_p(msr)) {					\
1142 		TEST_ASSERT(cond, fmt, str, args);				\
1143 	} else if (!(cond)) {							\
1144 		char buf[16];							\
1145 										\
1146 		snprintf(buf, sizeof(buf), "MSR 0x%x", msr);			\
1147 		TEST_ASSERT(cond, fmt, buf, args);				\
1148 	}									\
1149 } while (0)
1150 
1151 /*
1152  * Returns true if KVM should return the last written value when reading an MSR
1153  * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
1154  * is changing, etc.  This is NOT an exhaustive list!  The intent is to filter
1155  * out MSRs that are not durable _and_ that a selftest wants to write.
1156  */
is_durable_msr(uint32_t msr)1157 static inline bool is_durable_msr(uint32_t msr)
1158 {
1159 	return msr != MSR_IA32_TSC;
1160 }
1161 
1162 #define vcpu_set_msr(vcpu, msr, val)							\
1163 do {											\
1164 	uint64_t r, v = val;								\
1165 											\
1166 	TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1,				\
1167 			"KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v);	\
1168 	if (!is_durable_msr(msr))							\
1169 		break;									\
1170 	r = vcpu_get_msr(vcpu, msr);							\
1171 	TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
1172 } while (0)
1173 
1174 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
1175 void kvm_init_vm_address_properties(struct kvm_vm *vm);
1176 
1177 struct ex_regs {
1178 	uint64_t rax, rcx, rdx, rbx;
1179 	uint64_t rbp, rsi, rdi;
1180 	uint64_t r8, r9, r10, r11;
1181 	uint64_t r12, r13, r14, r15;
1182 	uint64_t vector;
1183 	uint64_t error_code;
1184 	uint64_t rip;
1185 	uint64_t cs;
1186 	uint64_t rflags;
1187 };
1188 
1189 struct idt_entry {
1190 	uint16_t offset0;
1191 	uint16_t selector;
1192 	uint16_t ist : 3;
1193 	uint16_t : 5;
1194 	uint16_t type : 4;
1195 	uint16_t : 1;
1196 	uint16_t dpl : 2;
1197 	uint16_t p : 1;
1198 	uint16_t offset1;
1199 	uint32_t offset2; uint32_t reserved;
1200 };
1201 
1202 void vm_install_exception_handler(struct kvm_vm *vm, int vector,
1203 			void (*handler)(struct ex_regs *));
1204 
1205 /*
1206  * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be
1207  * used to signal "no expcetion".
1208  */
1209 #define KVM_MAGIC_DE_VECTOR 0xff
1210 
1211 /* If a toddler were to say "abracadabra". */
1212 #define KVM_EXCEPTION_MAGIC 0xabacadabaULL
1213 
1214 /*
1215  * KVM selftest exception fixup uses registers to coordinate with the exception
1216  * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
1217  * per-CPU data.  Using only registers avoids having to map memory into the
1218  * guest, doesn't require a valid, stable GS.base, and reduces the risk of
1219  * for recursive faults when accessing memory in the handler.  The downside to
1220  * using registers is that it restricts what registers can be used by the actual
1221  * instruction.  But, selftests are 64-bit only, making register* pressure a
1222  * minor concern.  Use r9-r11 as they are volatile, i.e. don't need to be saved
1223  * by the callee, and except for r11 are not implicit parameters to any
1224  * instructions.  Ideally, fixup would use r8-r10 and thus avoid implicit
1225  * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
1226  * is higher priority than testing non-faulting SYSCALL/SYSRET.
1227  *
1228  * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
1229  * is guaranteed to be non-zero on fault.
1230  *
1231  * REGISTER INPUTS:
1232  * r9  = MAGIC
1233  * r10 = RIP
1234  * r11 = new RIP on fault
1235  *
1236  * REGISTER OUTPUTS:
1237  * r9  = exception vector (non-zero)
1238  * r10 = error code
1239  */
1240 #define __KVM_ASM_SAFE(insn, fep)				\
1241 	"mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t"	\
1242 	"lea 1f(%%rip), %%r10\n\t"				\
1243 	"lea 2f(%%rip), %%r11\n\t"				\
1244 	fep "1: " insn "\n\t"					\
1245 	"xor %%r9, %%r9\n\t"					\
1246 	"2:\n\t"						\
1247 	"mov  %%r9b, %[vector]\n\t"				\
1248 	"mov  %%r10, %[error_code]\n\t"
1249 
1250 #define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
1251 #define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
1252 
1253 #define KVM_ASM_SAFE_OUTPUTS(v, ec)	[vector] "=qm"(v), [error_code] "=rm"(ec)
1254 #define KVM_ASM_SAFE_CLOBBERS	"r9", "r10", "r11"
1255 
1256 #define kvm_asm_safe(insn, inputs...)					\
1257 ({									\
1258 	uint64_t ign_error_code;					\
1259 	uint8_t vector;							\
1260 									\
1261 	asm volatile(KVM_ASM_SAFE(insn)					\
1262 		     : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)	\
1263 		     : inputs						\
1264 		     : KVM_ASM_SAFE_CLOBBERS);				\
1265 	vector;								\
1266 })
1267 
1268 #define kvm_asm_safe_ec(insn, error_code, inputs...)			\
1269 ({									\
1270 	uint8_t vector;							\
1271 									\
1272 	asm volatile(KVM_ASM_SAFE(insn)					\
1273 		     : KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
1274 		     : inputs						\
1275 		     : KVM_ASM_SAFE_CLOBBERS);				\
1276 	vector;								\
1277 })
1278 
1279 #define kvm_asm_safe_fep(insn, inputs...)				\
1280 ({									\
1281 	uint64_t ign_error_code;					\
1282 	uint8_t vector;							\
1283 									\
1284 	asm volatile(KVM_ASM_SAFE_FEP(insn)				\
1285 		     : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code)	\
1286 		     : inputs						\
1287 		     : KVM_ASM_SAFE_CLOBBERS);				\
1288 	vector;								\
1289 })
1290 
1291 #define kvm_asm_safe_ec_fep(insn, error_code, inputs...)		\
1292 ({									\
1293 	uint8_t vector;							\
1294 									\
1295 	asm volatile(KVM_ASM_SAFE_FEP(insn)				\
1296 		     : KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
1297 		     : inputs						\
1298 		     : KVM_ASM_SAFE_CLOBBERS);				\
1299 	vector;								\
1300 })
1301 
1302 #define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)			\
1303 static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val)	\
1304 {									\
1305 	uint64_t error_code;						\
1306 	uint8_t vector;							\
1307 	uint32_t a, d;							\
1308 									\
1309 	asm volatile(KVM_ASM_SAFE##_FEP(#insn)				\
1310 		     : "=a"(a), "=d"(d),				\
1311 		       KVM_ASM_SAFE_OUTPUTS(vector, error_code)		\
1312 		     : "c"(idx)						\
1313 		     : KVM_ASM_SAFE_CLOBBERS);				\
1314 									\
1315 	*val = (uint64_t)a | ((uint64_t)d << 32);			\
1316 	return vector;							\
1317 }
1318 
1319 /*
1320  * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
1321  * use ECX as in input index, and EDX:EAX as a 64-bit output.
1322  */
1323 #define BUILD_READ_U64_SAFE_HELPERS(insn)				\
1324 	BUILD_READ_U64_SAFE_HELPER(insn, , )				\
1325 	BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP)			\
1326 
1327 BUILD_READ_U64_SAFE_HELPERS(rdmsr)
BUILD_READ_U64_SAFE_HELPERS(rdpmc)1328 BUILD_READ_U64_SAFE_HELPERS(rdpmc)
1329 BUILD_READ_U64_SAFE_HELPERS(xgetbv)
1330 
1331 static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
1332 {
1333 	return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
1334 }
1335 
xsetbv_safe(uint32_t index,uint64_t value)1336 static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
1337 {
1338 	u32 eax = value;
1339 	u32 edx = value >> 32;
1340 
1341 	return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
1342 }
1343 
1344 bool kvm_is_tdp_enabled(void);
1345 
get_kvm_intel_param_bool(const char * param)1346 static inline bool get_kvm_intel_param_bool(const char *param)
1347 {
1348 	return kvm_get_module_param_bool("kvm_intel", param);
1349 }
1350 
get_kvm_amd_param_bool(const char * param)1351 static inline bool get_kvm_amd_param_bool(const char *param)
1352 {
1353 	return kvm_get_module_param_bool("kvm_amd", param);
1354 }
1355 
get_kvm_intel_param_integer(const char * param)1356 static inline int get_kvm_intel_param_integer(const char *param)
1357 {
1358 	return kvm_get_module_param_integer("kvm_intel", param);
1359 }
1360 
get_kvm_amd_param_integer(const char * param)1361 static inline int get_kvm_amd_param_integer(const char *param)
1362 {
1363 	return kvm_get_module_param_integer("kvm_amd", param);
1364 }
1365 
kvm_is_pmu_enabled(void)1366 static inline bool kvm_is_pmu_enabled(void)
1367 {
1368 	return get_kvm_param_bool("enable_pmu");
1369 }
1370 
kvm_is_forced_emulation_enabled(void)1371 static inline bool kvm_is_forced_emulation_enabled(void)
1372 {
1373 	return !!get_kvm_param_integer("force_emulation_prefix");
1374 }
1375 
kvm_is_unrestricted_guest_enabled(void)1376 static inline bool kvm_is_unrestricted_guest_enabled(void)
1377 {
1378 	return get_kvm_intel_param_bool("unrestricted_guest");
1379 }
1380 
kvm_is_ignore_msrs(void)1381 static inline bool kvm_is_ignore_msrs(void)
1382 {
1383 	return get_kvm_param_bool("ignore_msrs");
1384 }
1385 
1386 uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr);
1387 
1388 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
1389 		       uint64_t a3);
1390 uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
1391 void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
1392 
__kvm_hypercall_map_gpa_range(uint64_t gpa,uint64_t size,uint64_t flags)1393 static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
1394 						     uint64_t size, uint64_t flags)
1395 {
1396 	return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
1397 }
1398 
kvm_hypercall_map_gpa_range(uint64_t gpa,uint64_t size,uint64_t flags)1399 static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
1400 					       uint64_t flags)
1401 {
1402 	uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
1403 
1404 	GUEST_ASSERT(!ret);
1405 }
1406 
1407 /*
1408  * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
1409  * intended to be a wake event arrives *after* HLT is executed.  Modern CPUs,
1410  * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
1411  * instruction after STI, *if* RFLAGS.IF=0 before STI.  Note, Intel CPUs may
1412  * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
1413  */
safe_halt(void)1414 static inline void safe_halt(void)
1415 {
1416 	asm volatile("sti; hlt");
1417 }
1418 
1419 /*
1420  * Enable interrupts and ensure that interrupts are evaluated upon return from
1421  * this function, i.e. execute a nop to consume the STi interrupt shadow.
1422  */
sti_nop(void)1423 static inline void sti_nop(void)
1424 {
1425 	asm volatile ("sti; nop");
1426 }
1427 
1428 /*
1429  * Enable interrupts for one instruction (nop), to allow the CPU to process all
1430  * interrupts that are already pending.
1431  */
sti_nop_cli(void)1432 static inline void sti_nop_cli(void)
1433 {
1434 	asm volatile ("sti; nop; cli");
1435 }
1436 
sti(void)1437 static inline void sti(void)
1438 {
1439 	asm volatile("sti");
1440 }
1441 
cli(void)1442 static inline void cli(void)
1443 {
1444 	asm volatile ("cli");
1445 }
1446 
1447 void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
1448 
1449 #define vm_xsave_require_permission(xfeature)	\
1450 	__vm_xsave_require_permission(xfeature, #xfeature)
1451 
1452 enum pg_level {
1453 	PG_LEVEL_NONE,
1454 	PG_LEVEL_4K,
1455 	PG_LEVEL_2M,
1456 	PG_LEVEL_1G,
1457 	PG_LEVEL_512G,
1458 	PG_LEVEL_256T
1459 };
1460 
1461 #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
1462 #define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
1463 
1464 #define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
1465 #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
1466 #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
1467 
1468 #define PTE_PRESENT_MASK(mmu)		((mmu)->arch.pte_masks.present)
1469 #define PTE_WRITABLE_MASK(mmu)		((mmu)->arch.pte_masks.writable)
1470 #define PTE_USER_MASK(mmu)		((mmu)->arch.pte_masks.user)
1471 #define PTE_READABLE_MASK(mmu)		((mmu)->arch.pte_masks.readable)
1472 #define PTE_EXECUTABLE_MASK(mmu)	((mmu)->arch.pte_masks.executable)
1473 #define PTE_ACCESSED_MASK(mmu)		((mmu)->arch.pte_masks.accessed)
1474 #define PTE_DIRTY_MASK(mmu)		((mmu)->arch.pte_masks.dirty)
1475 #define PTE_HUGE_MASK(mmu)		((mmu)->arch.pte_masks.huge)
1476 #define PTE_NX_MASK(mmu)		((mmu)->arch.pte_masks.nx)
1477 #define PTE_C_BIT_MASK(mmu)		((mmu)->arch.pte_masks.c)
1478 #define PTE_S_BIT_MASK(mmu)		((mmu)->arch.pte_masks.s)
1479 #define PTE_ALWAYS_SET_MASK(mmu)	((mmu)->arch.pte_masks.always_set)
1480 
1481 /*
1482  * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
1483  * if it's executable or readable, as EPT supports execute-only PTEs, but not
1484  * write-only PTEs.
1485  */
1486 #define is_present_pte(mmu, pte)		\
1487 	(PTE_PRESENT_MASK(mmu) ?		\
1488 	 !!(*(pte) & PTE_PRESENT_MASK(mmu)) :	\
1489 	 !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
1490 #define is_executable_pte(mmu, pte)	\
1491 	((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
1492 #define is_writable_pte(mmu, pte)	(!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
1493 #define is_user_pte(mmu, pte)		(!!(*(pte) & PTE_USER_MASK(mmu)))
1494 #define is_accessed_pte(mmu, pte)	(!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
1495 #define is_dirty_pte(mmu, pte)		(!!(*(pte) & PTE_DIRTY_MASK(mmu)))
1496 #define is_huge_pte(mmu, pte)		(!!(*(pte) & PTE_HUGE_MASK(mmu)))
1497 #define is_nx_pte(mmu, pte)		(!is_executable_pte(mmu, pte))
1498 
1499 void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
1500 		  struct pte_masks *pte_masks);
1501 
1502 void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
1503 		   uint64_t paddr,  int level);
1504 void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1505 		    uint64_t nr_bytes, int level);
1506 
1507 void vm_enable_tdp(struct kvm_vm *vm);
1508 bool kvm_cpu_has_tdp(void);
1509 void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size);
1510 void tdp_identity_map_default_memslots(struct kvm_vm *vm);
1511 void tdp_identity_map_1g(struct kvm_vm *vm,  uint64_t addr, uint64_t size);
1512 uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa);
1513 
1514 /*
1515  * Basic CPU control in CR0
1516  */
1517 #define X86_CR0_PE          (1UL<<0) /* Protection Enable */
1518 #define X86_CR0_MP          (1UL<<1) /* Monitor Coprocessor */
1519 #define X86_CR0_EM          (1UL<<2) /* Emulation */
1520 #define X86_CR0_TS          (1UL<<3) /* Task Switched */
1521 #define X86_CR0_ET          (1UL<<4) /* Extension Type */
1522 #define X86_CR0_NE          (1UL<<5) /* Numeric Error */
1523 #define X86_CR0_WP          (1UL<<16) /* Write Protect */
1524 #define X86_CR0_AM          (1UL<<18) /* Alignment Mask */
1525 #define X86_CR0_NW          (1UL<<29) /* Not Write-through */
1526 #define X86_CR0_CD          (1UL<<30) /* Cache Disable */
1527 #define X86_CR0_PG          (1UL<<31) /* Paging */
1528 
1529 #define PFERR_PRESENT_BIT 0
1530 #define PFERR_WRITE_BIT 1
1531 #define PFERR_USER_BIT 2
1532 #define PFERR_RSVD_BIT 3
1533 #define PFERR_FETCH_BIT 4
1534 #define PFERR_PK_BIT 5
1535 #define PFERR_SGX_BIT 15
1536 #define PFERR_GUEST_FINAL_BIT 32
1537 #define PFERR_GUEST_PAGE_BIT 33
1538 #define PFERR_IMPLICIT_ACCESS_BIT 48
1539 
1540 #define PFERR_PRESENT_MASK	BIT(PFERR_PRESENT_BIT)
1541 #define PFERR_WRITE_MASK	BIT(PFERR_WRITE_BIT)
1542 #define PFERR_USER_MASK		BIT(PFERR_USER_BIT)
1543 #define PFERR_RSVD_MASK		BIT(PFERR_RSVD_BIT)
1544 #define PFERR_FETCH_MASK	BIT(PFERR_FETCH_BIT)
1545 #define PFERR_PK_MASK		BIT(PFERR_PK_BIT)
1546 #define PFERR_SGX_MASK		BIT(PFERR_SGX_BIT)
1547 #define PFERR_GUEST_FINAL_MASK	BIT_ULL(PFERR_GUEST_FINAL_BIT)
1548 #define PFERR_GUEST_PAGE_MASK	BIT_ULL(PFERR_GUEST_PAGE_BIT)
1549 #define PFERR_IMPLICIT_ACCESS	BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
1550 
1551 bool sys_clocksource_is_based_on_tsc(void);
1552 
1553 #endif /* SELFTEST_KVM_PROCESSOR_H */
1554