1 /* SPDX-License-Identifier: GPL-2.0-only */
2 /*
3 * Copyright (C) 2018, Google LLC.
4 */
5
6 #ifndef SELFTEST_KVM_PROCESSOR_H
7 #define SELFTEST_KVM_PROCESSOR_H
8
9 #include <assert.h>
10 #include <stdint.h>
11 #include <syscall.h>
12
13 #include <asm/msr-index.h>
14 #include <asm/prctl.h>
15
16 #include <linux/kvm_para.h>
17 #include <linux/stringify.h>
18
19 #include "kvm_util.h"
20 #include "ucall_common.h"
21
22 extern bool host_cpu_is_intel;
23 extern bool host_cpu_is_amd;
24 extern uint64_t guest_tsc_khz;
25
26 #ifndef MAX_NR_CPUID_ENTRIES
27 #define MAX_NR_CPUID_ENTRIES 100
28 #endif
29
30 #define NONCANONICAL 0xaaaaaaaaaaaaaaaaull
31
32 /* Forced emulation prefix, used to invoke the emulator unconditionally. */
33 #define KVM_FEP "ud2; .byte 'k', 'v', 'm';"
34
35 #define NMI_VECTOR 0x02
36
37 const char *ex_str(int vector);
38
39 #define X86_EFLAGS_FIXED (1u << 1)
40
41 #define X86_CR4_VME (1ul << 0)
42 #define X86_CR4_PVI (1ul << 1)
43 #define X86_CR4_TSD (1ul << 2)
44 #define X86_CR4_DE (1ul << 3)
45 #define X86_CR4_PSE (1ul << 4)
46 #define X86_CR4_PAE (1ul << 5)
47 #define X86_CR4_MCE (1ul << 6)
48 #define X86_CR4_PGE (1ul << 7)
49 #define X86_CR4_PCE (1ul << 8)
50 #define X86_CR4_OSFXSR (1ul << 9)
51 #define X86_CR4_OSXMMEXCPT (1ul << 10)
52 #define X86_CR4_UMIP (1ul << 11)
53 #define X86_CR4_LA57 (1ul << 12)
54 #define X86_CR4_VMXE (1ul << 13)
55 #define X86_CR4_SMXE (1ul << 14)
56 #define X86_CR4_FSGSBASE (1ul << 16)
57 #define X86_CR4_PCIDE (1ul << 17)
58 #define X86_CR4_OSXSAVE (1ul << 18)
59 #define X86_CR4_SMEP (1ul << 20)
60 #define X86_CR4_SMAP (1ul << 21)
61 #define X86_CR4_PKE (1ul << 22)
62
63 struct xstate_header {
64 u64 xstate_bv;
65 u64 xcomp_bv;
66 u64 reserved[6];
67 } __attribute__((packed));
68
69 struct xstate {
70 u8 i387[512];
71 struct xstate_header header;
72 u8 extended_state_area[0];
73 } __attribute__ ((packed, aligned (64)));
74
75 #define XFEATURE_MASK_FP BIT_ULL(0)
76 #define XFEATURE_MASK_SSE BIT_ULL(1)
77 #define XFEATURE_MASK_YMM BIT_ULL(2)
78 #define XFEATURE_MASK_BNDREGS BIT_ULL(3)
79 #define XFEATURE_MASK_BNDCSR BIT_ULL(4)
80 #define XFEATURE_MASK_OPMASK BIT_ULL(5)
81 #define XFEATURE_MASK_ZMM_Hi256 BIT_ULL(6)
82 #define XFEATURE_MASK_Hi16_ZMM BIT_ULL(7)
83 #define XFEATURE_MASK_PT BIT_ULL(8)
84 #define XFEATURE_MASK_PKRU BIT_ULL(9)
85 #define XFEATURE_MASK_PASID BIT_ULL(10)
86 #define XFEATURE_MASK_CET_USER BIT_ULL(11)
87 #define XFEATURE_MASK_CET_KERNEL BIT_ULL(12)
88 #define XFEATURE_MASK_LBR BIT_ULL(15)
89 #define XFEATURE_MASK_XTILE_CFG BIT_ULL(17)
90 #define XFEATURE_MASK_XTILE_DATA BIT_ULL(18)
91
92 #define XFEATURE_MASK_AVX512 (XFEATURE_MASK_OPMASK | \
93 XFEATURE_MASK_ZMM_Hi256 | \
94 XFEATURE_MASK_Hi16_ZMM)
95 #define XFEATURE_MASK_XTILE (XFEATURE_MASK_XTILE_DATA | \
96 XFEATURE_MASK_XTILE_CFG)
97
98 /* Note, these are ordered alphabetically to match kvm_cpuid_entry2. Eww. */
99 enum cpuid_output_regs {
100 KVM_CPUID_EAX,
101 KVM_CPUID_EBX,
102 KVM_CPUID_ECX,
103 KVM_CPUID_EDX
104 };
105
106 /*
107 * Pack the information into a 64-bit value so that each X86_FEATURE_XXX can be
108 * passed by value with no overhead.
109 */
110 struct kvm_x86_cpu_feature {
111 u32 function;
112 u16 index;
113 u8 reg;
114 u8 bit;
115 };
116 #define KVM_X86_CPU_FEATURE(fn, idx, gpr, __bit) \
117 ({ \
118 struct kvm_x86_cpu_feature feature = { \
119 .function = fn, \
120 .index = idx, \
121 .reg = KVM_CPUID_##gpr, \
122 .bit = __bit, \
123 }; \
124 \
125 kvm_static_assert((fn & 0xc0000000) == 0 || \
126 (fn & 0xc0000000) == 0x40000000 || \
127 (fn & 0xc0000000) == 0x80000000 || \
128 (fn & 0xc0000000) == 0xc0000000); \
129 kvm_static_assert(idx < BIT(sizeof(feature.index) * BITS_PER_BYTE)); \
130 feature; \
131 })
132
133 /*
134 * Basic Leafs, a.k.a. Intel defined
135 */
136 #define X86_FEATURE_MWAIT KVM_X86_CPU_FEATURE(0x1, 0, ECX, 3)
137 #define X86_FEATURE_VMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 5)
138 #define X86_FEATURE_SMX KVM_X86_CPU_FEATURE(0x1, 0, ECX, 6)
139 #define X86_FEATURE_PDCM KVM_X86_CPU_FEATURE(0x1, 0, ECX, 15)
140 #define X86_FEATURE_PCID KVM_X86_CPU_FEATURE(0x1, 0, ECX, 17)
141 #define X86_FEATURE_X2APIC KVM_X86_CPU_FEATURE(0x1, 0, ECX, 21)
142 #define X86_FEATURE_MOVBE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 22)
143 #define X86_FEATURE_TSC_DEADLINE_TIMER KVM_X86_CPU_FEATURE(0x1, 0, ECX, 24)
144 #define X86_FEATURE_XSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 26)
145 #define X86_FEATURE_OSXSAVE KVM_X86_CPU_FEATURE(0x1, 0, ECX, 27)
146 #define X86_FEATURE_RDRAND KVM_X86_CPU_FEATURE(0x1, 0, ECX, 30)
147 #define X86_FEATURE_HYPERVISOR KVM_X86_CPU_FEATURE(0x1, 0, ECX, 31)
148 #define X86_FEATURE_PAE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 6)
149 #define X86_FEATURE_MCE KVM_X86_CPU_FEATURE(0x1, 0, EDX, 7)
150 #define X86_FEATURE_APIC KVM_X86_CPU_FEATURE(0x1, 0, EDX, 9)
151 #define X86_FEATURE_CLFLUSH KVM_X86_CPU_FEATURE(0x1, 0, EDX, 19)
152 #define X86_FEATURE_XMM KVM_X86_CPU_FEATURE(0x1, 0, EDX, 25)
153 #define X86_FEATURE_XMM2 KVM_X86_CPU_FEATURE(0x1, 0, EDX, 26)
154 #define X86_FEATURE_FSGSBASE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 0)
155 #define X86_FEATURE_TSC_ADJUST KVM_X86_CPU_FEATURE(0x7, 0, EBX, 1)
156 #define X86_FEATURE_SGX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 2)
157 #define X86_FEATURE_HLE KVM_X86_CPU_FEATURE(0x7, 0, EBX, 4)
158 #define X86_FEATURE_SMEP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 7)
159 #define X86_FEATURE_INVPCID KVM_X86_CPU_FEATURE(0x7, 0, EBX, 10)
160 #define X86_FEATURE_RTM KVM_X86_CPU_FEATURE(0x7, 0, EBX, 11)
161 #define X86_FEATURE_MPX KVM_X86_CPU_FEATURE(0x7, 0, EBX, 14)
162 #define X86_FEATURE_SMAP KVM_X86_CPU_FEATURE(0x7, 0, EBX, 20)
163 #define X86_FEATURE_PCOMMIT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 22)
164 #define X86_FEATURE_CLFLUSHOPT KVM_X86_CPU_FEATURE(0x7, 0, EBX, 23)
165 #define X86_FEATURE_CLWB KVM_X86_CPU_FEATURE(0x7, 0, EBX, 24)
166 #define X86_FEATURE_UMIP KVM_X86_CPU_FEATURE(0x7, 0, ECX, 2)
167 #define X86_FEATURE_PKU KVM_X86_CPU_FEATURE(0x7, 0, ECX, 3)
168 #define X86_FEATURE_OSPKE KVM_X86_CPU_FEATURE(0x7, 0, ECX, 4)
169 #define X86_FEATURE_LA57 KVM_X86_CPU_FEATURE(0x7, 0, ECX, 16)
170 #define X86_FEATURE_RDPID KVM_X86_CPU_FEATURE(0x7, 0, ECX, 22)
171 #define X86_FEATURE_SGX_LC KVM_X86_CPU_FEATURE(0x7, 0, ECX, 30)
172 #define X86_FEATURE_SHSTK KVM_X86_CPU_FEATURE(0x7, 0, ECX, 7)
173 #define X86_FEATURE_IBT KVM_X86_CPU_FEATURE(0x7, 0, EDX, 20)
174 #define X86_FEATURE_AMX_TILE KVM_X86_CPU_FEATURE(0x7, 0, EDX, 24)
175 #define X86_FEATURE_SPEC_CTRL KVM_X86_CPU_FEATURE(0x7, 0, EDX, 26)
176 #define X86_FEATURE_ARCH_CAPABILITIES KVM_X86_CPU_FEATURE(0x7, 0, EDX, 29)
177 #define X86_FEATURE_PKS KVM_X86_CPU_FEATURE(0x7, 0, ECX, 31)
178 #define X86_FEATURE_XTILECFG KVM_X86_CPU_FEATURE(0xD, 0, EAX, 17)
179 #define X86_FEATURE_XTILEDATA KVM_X86_CPU_FEATURE(0xD, 0, EAX, 18)
180 #define X86_FEATURE_XSAVES KVM_X86_CPU_FEATURE(0xD, 1, EAX, 3)
181 #define X86_FEATURE_XFD KVM_X86_CPU_FEATURE(0xD, 1, EAX, 4)
182 #define X86_FEATURE_XTILEDATA_XFD KVM_X86_CPU_FEATURE(0xD, 18, ECX, 2)
183
184 /*
185 * Extended Leafs, a.k.a. AMD defined
186 */
187 #define X86_FEATURE_SVM KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 2)
188 #define X86_FEATURE_PERFCTR_CORE KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 23)
189 #define X86_FEATURE_PERFCTR_NB KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 24)
190 #define X86_FEATURE_PERFCTR_LLC KVM_X86_CPU_FEATURE(0x80000001, 0, ECX, 28)
191 #define X86_FEATURE_NX KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 20)
192 #define X86_FEATURE_GBPAGES KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 26)
193 #define X86_FEATURE_RDTSCP KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 27)
194 #define X86_FEATURE_LM KVM_X86_CPU_FEATURE(0x80000001, 0, EDX, 29)
195 #define X86_FEATURE_INVTSC KVM_X86_CPU_FEATURE(0x80000007, 0, EDX, 8)
196 #define X86_FEATURE_RDPRU KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 4)
197 #define X86_FEATURE_AMD_IBPB KVM_X86_CPU_FEATURE(0x80000008, 0, EBX, 12)
198 #define X86_FEATURE_NPT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 0)
199 #define X86_FEATURE_LBRV KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 1)
200 #define X86_FEATURE_NRIPS KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 3)
201 #define X86_FEATURE_TSCRATEMSR KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 4)
202 #define X86_FEATURE_PAUSEFILTER KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 10)
203 #define X86_FEATURE_PFTHRESHOLD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 12)
204 #define X86_FEATURE_V_VMSAVE_VMLOAD KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 15)
205 #define X86_FEATURE_VGIF KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 16)
206 #define X86_FEATURE_IDLE_HLT KVM_X86_CPU_FEATURE(0x8000000A, 0, EDX, 30)
207 #define X86_FEATURE_SEV KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 1)
208 #define X86_FEATURE_SEV_ES KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 3)
209 #define X86_FEATURE_SEV_SNP KVM_X86_CPU_FEATURE(0x8000001F, 0, EAX, 4)
210 #define X86_FEATURE_PERFMON_V2 KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 0)
211 #define X86_FEATURE_LBR_PMC_FREEZE KVM_X86_CPU_FEATURE(0x80000022, 0, EAX, 2)
212
213 /*
214 * KVM defined paravirt features.
215 */
216 #define X86_FEATURE_KVM_CLOCKSOURCE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 0)
217 #define X86_FEATURE_KVM_NOP_IO_DELAY KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 1)
218 #define X86_FEATURE_KVM_MMU_OP KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 2)
219 #define X86_FEATURE_KVM_CLOCKSOURCE2 KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 3)
220 #define X86_FEATURE_KVM_ASYNC_PF KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 4)
221 #define X86_FEATURE_KVM_STEAL_TIME KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 5)
222 #define X86_FEATURE_KVM_PV_EOI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 6)
223 #define X86_FEATURE_KVM_PV_UNHALT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 7)
224 /* Bit 8 apparently isn't used?!?! */
225 #define X86_FEATURE_KVM_PV_TLB_FLUSH KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 9)
226 #define X86_FEATURE_KVM_ASYNC_PF_VMEXIT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 10)
227 #define X86_FEATURE_KVM_PV_SEND_IPI KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 11)
228 #define X86_FEATURE_KVM_POLL_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 12)
229 #define X86_FEATURE_KVM_PV_SCHED_YIELD KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 13)
230 #define X86_FEATURE_KVM_ASYNC_PF_INT KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 14)
231 #define X86_FEATURE_KVM_MSI_EXT_DEST_ID KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 15)
232 #define X86_FEATURE_KVM_HC_MAP_GPA_RANGE KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 16)
233 #define X86_FEATURE_KVM_MIGRATION_CONTROL KVM_X86_CPU_FEATURE(0x40000001, 0, EAX, 17)
234
235 /*
236 * Same idea as X86_FEATURE_XXX, but X86_PROPERTY_XXX retrieves a multi-bit
237 * value/property as opposed to a single-bit feature. Again, pack the info
238 * into a 64-bit value to pass by value with no overhead.
239 */
240 struct kvm_x86_cpu_property {
241 u32 function;
242 u8 index;
243 u8 reg;
244 u8 lo_bit;
245 u8 hi_bit;
246 };
247 #define KVM_X86_CPU_PROPERTY(fn, idx, gpr, low_bit, high_bit) \
248 ({ \
249 struct kvm_x86_cpu_property property = { \
250 .function = fn, \
251 .index = idx, \
252 .reg = KVM_CPUID_##gpr, \
253 .lo_bit = low_bit, \
254 .hi_bit = high_bit, \
255 }; \
256 \
257 kvm_static_assert(low_bit < high_bit); \
258 kvm_static_assert((fn & 0xc0000000) == 0 || \
259 (fn & 0xc0000000) == 0x40000000 || \
260 (fn & 0xc0000000) == 0x80000000 || \
261 (fn & 0xc0000000) == 0xc0000000); \
262 kvm_static_assert(idx < BIT(sizeof(property.index) * BITS_PER_BYTE)); \
263 property; \
264 })
265
266 #define X86_PROPERTY_MAX_BASIC_LEAF KVM_X86_CPU_PROPERTY(0, 0, EAX, 0, 31)
267 #define X86_PROPERTY_PMU_VERSION KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 0, 7)
268 #define X86_PROPERTY_PMU_NR_GP_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 8, 15)
269 #define X86_PROPERTY_PMU_GP_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 16, 23)
270 #define X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH KVM_X86_CPU_PROPERTY(0xa, 0, EAX, 24, 31)
271 #define X86_PROPERTY_PMU_EVENTS_MASK KVM_X86_CPU_PROPERTY(0xa, 0, EBX, 0, 12)
272 #define X86_PROPERTY_PMU_FIXED_COUNTERS_BITMASK KVM_X86_CPU_PROPERTY(0xa, 0, ECX, 0, 31)
273 #define X86_PROPERTY_PMU_NR_FIXED_COUNTERS KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 0, 4)
274 #define X86_PROPERTY_PMU_FIXED_COUNTERS_BIT_WIDTH KVM_X86_CPU_PROPERTY(0xa, 0, EDX, 5, 12)
275
276 #define X86_PROPERTY_SUPPORTED_XCR0_LO KVM_X86_CPU_PROPERTY(0xd, 0, EAX, 0, 31)
277 #define X86_PROPERTY_XSTATE_MAX_SIZE_XCR0 KVM_X86_CPU_PROPERTY(0xd, 0, EBX, 0, 31)
278 #define X86_PROPERTY_XSTATE_MAX_SIZE KVM_X86_CPU_PROPERTY(0xd, 0, ECX, 0, 31)
279 #define X86_PROPERTY_SUPPORTED_XCR0_HI KVM_X86_CPU_PROPERTY(0xd, 0, EDX, 0, 31)
280
281 #define X86_PROPERTY_XSTATE_TILE_SIZE KVM_X86_CPU_PROPERTY(0xd, 18, EAX, 0, 31)
282 #define X86_PROPERTY_XSTATE_TILE_OFFSET KVM_X86_CPU_PROPERTY(0xd, 18, EBX, 0, 31)
283 #define X86_PROPERTY_AMX_MAX_PALETTE_TABLES KVM_X86_CPU_PROPERTY(0x1d, 0, EAX, 0, 31)
284 #define X86_PROPERTY_AMX_TOTAL_TILE_BYTES KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 0, 15)
285 #define X86_PROPERTY_AMX_BYTES_PER_TILE KVM_X86_CPU_PROPERTY(0x1d, 1, EAX, 16, 31)
286 #define X86_PROPERTY_AMX_BYTES_PER_ROW KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 0, 15)
287 #define X86_PROPERTY_AMX_NR_TILE_REGS KVM_X86_CPU_PROPERTY(0x1d, 1, EBX, 16, 31)
288 #define X86_PROPERTY_AMX_MAX_ROWS KVM_X86_CPU_PROPERTY(0x1d, 1, ECX, 0, 15)
289
290 #define X86_PROPERTY_MAX_KVM_LEAF KVM_X86_CPU_PROPERTY(0x40000000, 0, EAX, 0, 31)
291
292 #define X86_PROPERTY_MAX_EXT_LEAF KVM_X86_CPU_PROPERTY(0x80000000, 0, EAX, 0, 31)
293 #define X86_PROPERTY_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 0, 7)
294 #define X86_PROPERTY_MAX_VIRT_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 8, 15)
295 #define X86_PROPERTY_GUEST_MAX_PHY_ADDR KVM_X86_CPU_PROPERTY(0x80000008, 0, EAX, 16, 23)
296 #define X86_PROPERTY_SEV_C_BIT KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 0, 5)
297 #define X86_PROPERTY_PHYS_ADDR_REDUCTION KVM_X86_CPU_PROPERTY(0x8000001F, 0, EBX, 6, 11)
298 #define X86_PROPERTY_NR_PERFCTR_CORE KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 0, 3)
299 #define X86_PROPERTY_NR_PERFCTR_NB KVM_X86_CPU_PROPERTY(0x80000022, 0, EBX, 10, 15)
300
301 #define X86_PROPERTY_MAX_CENTAUR_LEAF KVM_X86_CPU_PROPERTY(0xC0000000, 0, EAX, 0, 31)
302
303 /*
304 * Intel's architectural PMU events are bizarre. They have a "feature" bit
305 * that indicates the feature is _not_ supported, and a property that states
306 * the length of the bit mask of unsupported features. A feature is supported
307 * if the size of the bit mask is larger than the "unavailable" bit, and said
308 * bit is not set. Fixed counters also bizarre enumeration, but inverted from
309 * arch events for general purpose counters. Fixed counters are supported if a
310 * feature flag is set **OR** the total number of fixed counters is greater
311 * than index of the counter.
312 *
313 * Wrap the events for general purpose and fixed counters to simplify checking
314 * whether or not a given architectural event is supported.
315 */
316 struct kvm_x86_pmu_feature {
317 struct kvm_x86_cpu_feature f;
318 };
319 #define KVM_X86_PMU_FEATURE(__reg, __bit) \
320 ({ \
321 struct kvm_x86_pmu_feature feature = { \
322 .f = KVM_X86_CPU_FEATURE(0xa, 0, __reg, __bit), \
323 }; \
324 \
325 kvm_static_assert(KVM_CPUID_##__reg == KVM_CPUID_EBX || \
326 KVM_CPUID_##__reg == KVM_CPUID_ECX); \
327 feature; \
328 })
329
330 #define X86_PMU_FEATURE_CPU_CYCLES KVM_X86_PMU_FEATURE(EBX, 0)
331 #define X86_PMU_FEATURE_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 1)
332 #define X86_PMU_FEATURE_REFERENCE_CYCLES KVM_X86_PMU_FEATURE(EBX, 2)
333 #define X86_PMU_FEATURE_LLC_REFERENCES KVM_X86_PMU_FEATURE(EBX, 3)
334 #define X86_PMU_FEATURE_LLC_MISSES KVM_X86_PMU_FEATURE(EBX, 4)
335 #define X86_PMU_FEATURE_BRANCH_INSNS_RETIRED KVM_X86_PMU_FEATURE(EBX, 5)
336 #define X86_PMU_FEATURE_BRANCHES_MISPREDICTED KVM_X86_PMU_FEATURE(EBX, 6)
337 #define X86_PMU_FEATURE_TOPDOWN_SLOTS KVM_X86_PMU_FEATURE(EBX, 7)
338 #define X86_PMU_FEATURE_TOPDOWN_BE_BOUND KVM_X86_PMU_FEATURE(EBX, 8)
339 #define X86_PMU_FEATURE_TOPDOWN_BAD_SPEC KVM_X86_PMU_FEATURE(EBX, 9)
340 #define X86_PMU_FEATURE_TOPDOWN_FE_BOUND KVM_X86_PMU_FEATURE(EBX, 10)
341 #define X86_PMU_FEATURE_TOPDOWN_RETIRING KVM_X86_PMU_FEATURE(EBX, 11)
342 #define X86_PMU_FEATURE_LBR_INSERTS KVM_X86_PMU_FEATURE(EBX, 12)
343
344 #define X86_PMU_FEATURE_INSNS_RETIRED_FIXED KVM_X86_PMU_FEATURE(ECX, 0)
345 #define X86_PMU_FEATURE_CPU_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 1)
346 #define X86_PMU_FEATURE_REFERENCE_TSC_CYCLES_FIXED KVM_X86_PMU_FEATURE(ECX, 2)
347 #define X86_PMU_FEATURE_TOPDOWN_SLOTS_FIXED KVM_X86_PMU_FEATURE(ECX, 3)
348
x86_family(unsigned int eax)349 static inline unsigned int x86_family(unsigned int eax)
350 {
351 unsigned int x86;
352
353 x86 = (eax >> 8) & 0xf;
354
355 if (x86 == 0xf)
356 x86 += (eax >> 20) & 0xff;
357
358 return x86;
359 }
360
x86_model(unsigned int eax)361 static inline unsigned int x86_model(unsigned int eax)
362 {
363 return ((eax >> 12) & 0xf0) | ((eax >> 4) & 0x0f);
364 }
365
366 #define PHYSICAL_PAGE_MASK GENMASK_ULL(51, 12)
367
368 #define PAGE_SHIFT 12
369 #define PAGE_SIZE (1ULL << PAGE_SHIFT)
370 #define PAGE_MASK (~(PAGE_SIZE-1) & PHYSICAL_PAGE_MASK)
371
372 #define HUGEPAGE_SHIFT(x) (PAGE_SHIFT + (((x) - 1) * 9))
373 #define HUGEPAGE_SIZE(x) (1UL << HUGEPAGE_SHIFT(x))
374 #define HUGEPAGE_MASK(x) (~(HUGEPAGE_SIZE(x) - 1) & PHYSICAL_PAGE_MASK)
375
376 #define PTE_GET_PA(pte) ((pte) & PHYSICAL_PAGE_MASK)
377 #define PTE_GET_PFN(pte) (PTE_GET_PA(pte) >> PAGE_SHIFT)
378
379 /* General Registers in 64-Bit Mode */
380 struct gpr64_regs {
381 u64 rax;
382 u64 rcx;
383 u64 rdx;
384 u64 rbx;
385 u64 rsp;
386 u64 rbp;
387 u64 rsi;
388 u64 rdi;
389 u64 r8;
390 u64 r9;
391 u64 r10;
392 u64 r11;
393 u64 r12;
394 u64 r13;
395 u64 r14;
396 u64 r15;
397 };
398
399 struct desc64 {
400 uint16_t limit0;
401 uint16_t base0;
402 unsigned base1:8, type:4, s:1, dpl:2, p:1;
403 unsigned limit1:4, avl:1, l:1, db:1, g:1, base2:8;
404 uint32_t base3;
405 uint32_t zero1;
406 } __attribute__((packed));
407
408 struct desc_ptr {
409 uint16_t size;
410 uint64_t address;
411 } __attribute__((packed));
412
413 struct kvm_x86_state {
414 struct kvm_xsave *xsave;
415 struct kvm_vcpu_events events;
416 struct kvm_mp_state mp_state;
417 struct kvm_regs regs;
418 struct kvm_xcrs xcrs;
419 struct kvm_sregs sregs;
420 struct kvm_debugregs debugregs;
421 union {
422 struct kvm_nested_state nested;
423 char nested_[16384];
424 };
425 struct kvm_msrs msrs;
426 };
427
get_desc64_base(const struct desc64 * desc)428 static inline uint64_t get_desc64_base(const struct desc64 *desc)
429 {
430 return (uint64_t)desc->base3 << 32 |
431 (uint64_t)desc->base2 << 24 |
432 (uint64_t)desc->base1 << 16 |
433 (uint64_t)desc->base0;
434 }
435
rdtsc(void)436 static inline uint64_t rdtsc(void)
437 {
438 uint32_t eax, edx;
439 uint64_t tsc_val;
440 /*
441 * The lfence is to wait (on Intel CPUs) until all previous
442 * instructions have been executed. If software requires RDTSC to be
443 * executed prior to execution of any subsequent instruction, it can
444 * execute LFENCE immediately after RDTSC
445 */
446 __asm__ __volatile__("lfence; rdtsc; lfence" : "=a"(eax), "=d"(edx));
447 tsc_val = ((uint64_t)edx) << 32 | eax;
448 return tsc_val;
449 }
450
rdtscp(uint32_t * aux)451 static inline uint64_t rdtscp(uint32_t *aux)
452 {
453 uint32_t eax, edx;
454
455 __asm__ __volatile__("rdtscp" : "=a"(eax), "=d"(edx), "=c"(*aux));
456 return ((uint64_t)edx) << 32 | eax;
457 }
458
rdmsr(uint32_t msr)459 static inline uint64_t rdmsr(uint32_t msr)
460 {
461 uint32_t a, d;
462
463 __asm__ __volatile__("rdmsr" : "=a"(a), "=d"(d) : "c"(msr) : "memory");
464
465 return a | ((uint64_t) d << 32);
466 }
467
wrmsr(uint32_t msr,uint64_t value)468 static inline void wrmsr(uint32_t msr, uint64_t value)
469 {
470 uint32_t a = value;
471 uint32_t d = value >> 32;
472
473 __asm__ __volatile__("wrmsr" :: "a"(a), "d"(d), "c"(msr) : "memory");
474 }
475
476
inw(uint16_t port)477 static inline uint16_t inw(uint16_t port)
478 {
479 uint16_t tmp;
480
481 __asm__ __volatile__("in %%dx, %%ax"
482 : /* output */ "=a" (tmp)
483 : /* input */ "d" (port));
484
485 return tmp;
486 }
487
get_es(void)488 static inline uint16_t get_es(void)
489 {
490 uint16_t es;
491
492 __asm__ __volatile__("mov %%es, %[es]"
493 : /* output */ [es]"=rm"(es));
494 return es;
495 }
496
get_cs(void)497 static inline uint16_t get_cs(void)
498 {
499 uint16_t cs;
500
501 __asm__ __volatile__("mov %%cs, %[cs]"
502 : /* output */ [cs]"=rm"(cs));
503 return cs;
504 }
505
get_ss(void)506 static inline uint16_t get_ss(void)
507 {
508 uint16_t ss;
509
510 __asm__ __volatile__("mov %%ss, %[ss]"
511 : /* output */ [ss]"=rm"(ss));
512 return ss;
513 }
514
get_ds(void)515 static inline uint16_t get_ds(void)
516 {
517 uint16_t ds;
518
519 __asm__ __volatile__("mov %%ds, %[ds]"
520 : /* output */ [ds]"=rm"(ds));
521 return ds;
522 }
523
get_fs(void)524 static inline uint16_t get_fs(void)
525 {
526 uint16_t fs;
527
528 __asm__ __volatile__("mov %%fs, %[fs]"
529 : /* output */ [fs]"=rm"(fs));
530 return fs;
531 }
532
get_gs(void)533 static inline uint16_t get_gs(void)
534 {
535 uint16_t gs;
536
537 __asm__ __volatile__("mov %%gs, %[gs]"
538 : /* output */ [gs]"=rm"(gs));
539 return gs;
540 }
541
get_tr(void)542 static inline uint16_t get_tr(void)
543 {
544 uint16_t tr;
545
546 __asm__ __volatile__("str %[tr]"
547 : /* output */ [tr]"=rm"(tr));
548 return tr;
549 }
550
get_cr0(void)551 static inline uint64_t get_cr0(void)
552 {
553 uint64_t cr0;
554
555 __asm__ __volatile__("mov %%cr0, %[cr0]"
556 : /* output */ [cr0]"=r"(cr0));
557 return cr0;
558 }
559
set_cr0(uint64_t val)560 static inline void set_cr0(uint64_t val)
561 {
562 __asm__ __volatile__("mov %0, %%cr0" : : "r" (val) : "memory");
563 }
564
get_cr3(void)565 static inline uint64_t get_cr3(void)
566 {
567 uint64_t cr3;
568
569 __asm__ __volatile__("mov %%cr3, %[cr3]"
570 : /* output */ [cr3]"=r"(cr3));
571 return cr3;
572 }
573
set_cr3(uint64_t val)574 static inline void set_cr3(uint64_t val)
575 {
576 __asm__ __volatile__("mov %0, %%cr3" : : "r" (val) : "memory");
577 }
578
get_cr4(void)579 static inline uint64_t get_cr4(void)
580 {
581 uint64_t cr4;
582
583 __asm__ __volatile__("mov %%cr4, %[cr4]"
584 : /* output */ [cr4]"=r"(cr4));
585 return cr4;
586 }
587
set_cr4(uint64_t val)588 static inline void set_cr4(uint64_t val)
589 {
590 __asm__ __volatile__("mov %0, %%cr4" : : "r" (val) : "memory");
591 }
592
get_cr8(void)593 static inline uint64_t get_cr8(void)
594 {
595 uint64_t cr8;
596
597 __asm__ __volatile__("mov %%cr8, %[cr8]" : [cr8]"=r"(cr8));
598 return cr8;
599 }
600
set_cr8(uint64_t val)601 static inline void set_cr8(uint64_t val)
602 {
603 __asm__ __volatile__("mov %0, %%cr8" : : "r" (val) : "memory");
604 }
605
set_idt(const struct desc_ptr * idt_desc)606 static inline void set_idt(const struct desc_ptr *idt_desc)
607 {
608 __asm__ __volatile__("lidt %0"::"m"(*idt_desc));
609 }
610
xgetbv(u32 index)611 static inline u64 xgetbv(u32 index)
612 {
613 u32 eax, edx;
614
615 __asm__ __volatile__("xgetbv;"
616 : "=a" (eax), "=d" (edx)
617 : "c" (index));
618 return eax | ((u64)edx << 32);
619 }
620
xsetbv(u32 index,u64 value)621 static inline void xsetbv(u32 index, u64 value)
622 {
623 u32 eax = value;
624 u32 edx = value >> 32;
625
626 __asm__ __volatile__("xsetbv" :: "a" (eax), "d" (edx), "c" (index));
627 }
628
wrpkru(u32 pkru)629 static inline void wrpkru(u32 pkru)
630 {
631 /* Note, ECX and EDX are architecturally required to be '0'. */
632 asm volatile(".byte 0x0f,0x01,0xef\n\t"
633 : : "a" (pkru), "c"(0), "d"(0));
634 }
635
get_gdt(void)636 static inline struct desc_ptr get_gdt(void)
637 {
638 struct desc_ptr gdt;
639 __asm__ __volatile__("sgdt %[gdt]"
640 : /* output */ [gdt]"=m"(gdt));
641 return gdt;
642 }
643
get_idt(void)644 static inline struct desc_ptr get_idt(void)
645 {
646 struct desc_ptr idt;
647 __asm__ __volatile__("sidt %[idt]"
648 : /* output */ [idt]"=m"(idt));
649 return idt;
650 }
651
outl(uint16_t port,uint32_t value)652 static inline void outl(uint16_t port, uint32_t value)
653 {
654 __asm__ __volatile__("outl %%eax, %%dx" : : "d"(port), "a"(value));
655 }
656
__cpuid(uint32_t function,uint32_t index,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)657 static inline void __cpuid(uint32_t function, uint32_t index,
658 uint32_t *eax, uint32_t *ebx,
659 uint32_t *ecx, uint32_t *edx)
660 {
661 *eax = function;
662 *ecx = index;
663
664 asm volatile("cpuid"
665 : "=a" (*eax),
666 "=b" (*ebx),
667 "=c" (*ecx),
668 "=d" (*edx)
669 : "0" (*eax), "2" (*ecx)
670 : "memory");
671 }
672
cpuid(uint32_t function,uint32_t * eax,uint32_t * ebx,uint32_t * ecx,uint32_t * edx)673 static inline void cpuid(uint32_t function,
674 uint32_t *eax, uint32_t *ebx,
675 uint32_t *ecx, uint32_t *edx)
676 {
677 return __cpuid(function, 0, eax, ebx, ecx, edx);
678 }
679
this_cpu_fms(void)680 static inline uint32_t this_cpu_fms(void)
681 {
682 uint32_t eax, ebx, ecx, edx;
683
684 cpuid(1, &eax, &ebx, &ecx, &edx);
685 return eax;
686 }
687
this_cpu_family(void)688 static inline uint32_t this_cpu_family(void)
689 {
690 return x86_family(this_cpu_fms());
691 }
692
this_cpu_model(void)693 static inline uint32_t this_cpu_model(void)
694 {
695 return x86_model(this_cpu_fms());
696 }
697
this_cpu_vendor_string_is(const char * vendor)698 static inline bool this_cpu_vendor_string_is(const char *vendor)
699 {
700 const uint32_t *chunk = (const uint32_t *)vendor;
701 uint32_t eax, ebx, ecx, edx;
702
703 cpuid(0, &eax, &ebx, &ecx, &edx);
704 return (ebx == chunk[0] && edx == chunk[1] && ecx == chunk[2]);
705 }
706
this_cpu_is_intel(void)707 static inline bool this_cpu_is_intel(void)
708 {
709 return this_cpu_vendor_string_is("GenuineIntel");
710 }
711
712 /*
713 * Exclude early K5 samples with a vendor string of "AMDisbetter!"
714 */
this_cpu_is_amd(void)715 static inline bool this_cpu_is_amd(void)
716 {
717 return this_cpu_vendor_string_is("AuthenticAMD");
718 }
719
__this_cpu_has(uint32_t function,uint32_t index,uint8_t reg,uint8_t lo,uint8_t hi)720 static inline uint32_t __this_cpu_has(uint32_t function, uint32_t index,
721 uint8_t reg, uint8_t lo, uint8_t hi)
722 {
723 uint32_t gprs[4];
724
725 __cpuid(function, index,
726 &gprs[KVM_CPUID_EAX], &gprs[KVM_CPUID_EBX],
727 &gprs[KVM_CPUID_ECX], &gprs[KVM_CPUID_EDX]);
728
729 return (gprs[reg] & GENMASK(hi, lo)) >> lo;
730 }
731
this_cpu_has(struct kvm_x86_cpu_feature feature)732 static inline bool this_cpu_has(struct kvm_x86_cpu_feature feature)
733 {
734 return __this_cpu_has(feature.function, feature.index,
735 feature.reg, feature.bit, feature.bit);
736 }
737
this_cpu_property(struct kvm_x86_cpu_property property)738 static inline uint32_t this_cpu_property(struct kvm_x86_cpu_property property)
739 {
740 return __this_cpu_has(property.function, property.index,
741 property.reg, property.lo_bit, property.hi_bit);
742 }
743
this_cpu_has_p(struct kvm_x86_cpu_property property)744 static __always_inline bool this_cpu_has_p(struct kvm_x86_cpu_property property)
745 {
746 uint32_t max_leaf;
747
748 switch (property.function & 0xc0000000) {
749 case 0:
750 max_leaf = this_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
751 break;
752 case 0x40000000:
753 max_leaf = this_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
754 break;
755 case 0x80000000:
756 max_leaf = this_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
757 break;
758 case 0xc0000000:
759 max_leaf = this_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
760 }
761 return max_leaf >= property.function;
762 }
763
this_pmu_has(struct kvm_x86_pmu_feature feature)764 static inline bool this_pmu_has(struct kvm_x86_pmu_feature feature)
765 {
766 uint32_t nr_bits;
767
768 if (feature.f.reg == KVM_CPUID_EBX) {
769 nr_bits = this_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
770 return nr_bits > feature.f.bit && !this_cpu_has(feature.f);
771 }
772
773 GUEST_ASSERT(feature.f.reg == KVM_CPUID_ECX);
774 nr_bits = this_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
775 return nr_bits > feature.f.bit || this_cpu_has(feature.f);
776 }
777
this_cpu_supported_xcr0(void)778 static __always_inline uint64_t this_cpu_supported_xcr0(void)
779 {
780 if (!this_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
781 return 0;
782
783 return this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
784 ((uint64_t)this_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
785 }
786
787 typedef u32 __attribute__((vector_size(16))) sse128_t;
788 #define __sse128_u union { sse128_t vec; u64 as_u64[2]; u32 as_u32[4]; }
789 #define sse128_lo(x) ({ __sse128_u t; t.vec = x; t.as_u64[0]; })
790 #define sse128_hi(x) ({ __sse128_u t; t.vec = x; t.as_u64[1]; })
791
read_sse_reg(int reg,sse128_t * data)792 static inline void read_sse_reg(int reg, sse128_t *data)
793 {
794 switch (reg) {
795 case 0:
796 asm("movdqa %%xmm0, %0" : "=m"(*data));
797 break;
798 case 1:
799 asm("movdqa %%xmm1, %0" : "=m"(*data));
800 break;
801 case 2:
802 asm("movdqa %%xmm2, %0" : "=m"(*data));
803 break;
804 case 3:
805 asm("movdqa %%xmm3, %0" : "=m"(*data));
806 break;
807 case 4:
808 asm("movdqa %%xmm4, %0" : "=m"(*data));
809 break;
810 case 5:
811 asm("movdqa %%xmm5, %0" : "=m"(*data));
812 break;
813 case 6:
814 asm("movdqa %%xmm6, %0" : "=m"(*data));
815 break;
816 case 7:
817 asm("movdqa %%xmm7, %0" : "=m"(*data));
818 break;
819 default:
820 BUG();
821 }
822 }
823
write_sse_reg(int reg,const sse128_t * data)824 static inline void write_sse_reg(int reg, const sse128_t *data)
825 {
826 switch (reg) {
827 case 0:
828 asm("movdqa %0, %%xmm0" : : "m"(*data));
829 break;
830 case 1:
831 asm("movdqa %0, %%xmm1" : : "m"(*data));
832 break;
833 case 2:
834 asm("movdqa %0, %%xmm2" : : "m"(*data));
835 break;
836 case 3:
837 asm("movdqa %0, %%xmm3" : : "m"(*data));
838 break;
839 case 4:
840 asm("movdqa %0, %%xmm4" : : "m"(*data));
841 break;
842 case 5:
843 asm("movdqa %0, %%xmm5" : : "m"(*data));
844 break;
845 case 6:
846 asm("movdqa %0, %%xmm6" : : "m"(*data));
847 break;
848 case 7:
849 asm("movdqa %0, %%xmm7" : : "m"(*data));
850 break;
851 default:
852 BUG();
853 }
854 }
855
cpu_relax(void)856 static inline void cpu_relax(void)
857 {
858 asm volatile("rep; nop" ::: "memory");
859 }
860
udelay(unsigned long usec)861 static inline void udelay(unsigned long usec)
862 {
863 uint64_t start, now, cycles;
864
865 GUEST_ASSERT(guest_tsc_khz);
866 cycles = guest_tsc_khz / 1000 * usec;
867
868 /*
869 * Deliberately don't PAUSE, a.k.a. cpu_relax(), so that the delay is
870 * as accurate as possible, e.g. doesn't trigger PAUSE-Loop VM-Exits.
871 */
872 start = rdtsc();
873 do {
874 now = rdtsc();
875 } while (now - start < cycles);
876 }
877
878 #define ud2() \
879 __asm__ __volatile__( \
880 "ud2\n" \
881 )
882
883 #define hlt() \
884 __asm__ __volatile__( \
885 "hlt\n" \
886 )
887
888 struct kvm_x86_state *vcpu_save_state(struct kvm_vcpu *vcpu);
889 void vcpu_load_state(struct kvm_vcpu *vcpu, struct kvm_x86_state *state);
890 void kvm_x86_state_cleanup(struct kvm_x86_state *state);
891
892 const struct kvm_msr_list *kvm_get_msr_index_list(void);
893 const struct kvm_msr_list *kvm_get_feature_msr_index_list(void);
894 bool kvm_msr_is_in_save_restore_list(uint32_t msr_index);
895 uint64_t kvm_get_feature_msr(uint64_t msr_index);
896
vcpu_msrs_get(struct kvm_vcpu * vcpu,struct kvm_msrs * msrs)897 static inline void vcpu_msrs_get(struct kvm_vcpu *vcpu,
898 struct kvm_msrs *msrs)
899 {
900 int r = __vcpu_ioctl(vcpu, KVM_GET_MSRS, msrs);
901
902 TEST_ASSERT(r == msrs->nmsrs,
903 "KVM_GET_MSRS failed, r: %i (failed on MSR %x)",
904 r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
905 }
vcpu_msrs_set(struct kvm_vcpu * vcpu,struct kvm_msrs * msrs)906 static inline void vcpu_msrs_set(struct kvm_vcpu *vcpu, struct kvm_msrs *msrs)
907 {
908 int r = __vcpu_ioctl(vcpu, KVM_SET_MSRS, msrs);
909
910 TEST_ASSERT(r == msrs->nmsrs,
911 "KVM_SET_MSRS failed, r: %i (failed on MSR %x)",
912 r, r < 0 || r >= msrs->nmsrs ? -1 : msrs->entries[r].index);
913 }
vcpu_debugregs_get(struct kvm_vcpu * vcpu,struct kvm_debugregs * debugregs)914 static inline void vcpu_debugregs_get(struct kvm_vcpu *vcpu,
915 struct kvm_debugregs *debugregs)
916 {
917 vcpu_ioctl(vcpu, KVM_GET_DEBUGREGS, debugregs);
918 }
vcpu_debugregs_set(struct kvm_vcpu * vcpu,struct kvm_debugregs * debugregs)919 static inline void vcpu_debugregs_set(struct kvm_vcpu *vcpu,
920 struct kvm_debugregs *debugregs)
921 {
922 vcpu_ioctl(vcpu, KVM_SET_DEBUGREGS, debugregs);
923 }
vcpu_xsave_get(struct kvm_vcpu * vcpu,struct kvm_xsave * xsave)924 static inline void vcpu_xsave_get(struct kvm_vcpu *vcpu,
925 struct kvm_xsave *xsave)
926 {
927 vcpu_ioctl(vcpu, KVM_GET_XSAVE, xsave);
928 }
vcpu_xsave2_get(struct kvm_vcpu * vcpu,struct kvm_xsave * xsave)929 static inline void vcpu_xsave2_get(struct kvm_vcpu *vcpu,
930 struct kvm_xsave *xsave)
931 {
932 vcpu_ioctl(vcpu, KVM_GET_XSAVE2, xsave);
933 }
vcpu_xsave_set(struct kvm_vcpu * vcpu,struct kvm_xsave * xsave)934 static inline void vcpu_xsave_set(struct kvm_vcpu *vcpu,
935 struct kvm_xsave *xsave)
936 {
937 vcpu_ioctl(vcpu, KVM_SET_XSAVE, xsave);
938 }
vcpu_xcrs_get(struct kvm_vcpu * vcpu,struct kvm_xcrs * xcrs)939 static inline void vcpu_xcrs_get(struct kvm_vcpu *vcpu,
940 struct kvm_xcrs *xcrs)
941 {
942 vcpu_ioctl(vcpu, KVM_GET_XCRS, xcrs);
943 }
vcpu_xcrs_set(struct kvm_vcpu * vcpu,struct kvm_xcrs * xcrs)944 static inline void vcpu_xcrs_set(struct kvm_vcpu *vcpu, struct kvm_xcrs *xcrs)
945 {
946 vcpu_ioctl(vcpu, KVM_SET_XCRS, xcrs);
947 }
948
949 const struct kvm_cpuid_entry2 *get_cpuid_entry(const struct kvm_cpuid2 *cpuid,
950 uint32_t function, uint32_t index);
951 const struct kvm_cpuid2 *kvm_get_supported_cpuid(void);
952
kvm_cpu_fms(void)953 static inline uint32_t kvm_cpu_fms(void)
954 {
955 return get_cpuid_entry(kvm_get_supported_cpuid(), 0x1, 0)->eax;
956 }
957
kvm_cpu_family(void)958 static inline uint32_t kvm_cpu_family(void)
959 {
960 return x86_family(kvm_cpu_fms());
961 }
962
kvm_cpu_model(void)963 static inline uint32_t kvm_cpu_model(void)
964 {
965 return x86_model(kvm_cpu_fms());
966 }
967
968 bool kvm_cpuid_has(const struct kvm_cpuid2 *cpuid,
969 struct kvm_x86_cpu_feature feature);
970
kvm_cpu_has(struct kvm_x86_cpu_feature feature)971 static inline bool kvm_cpu_has(struct kvm_x86_cpu_feature feature)
972 {
973 return kvm_cpuid_has(kvm_get_supported_cpuid(), feature);
974 }
975
976 uint32_t kvm_cpuid_property(const struct kvm_cpuid2 *cpuid,
977 struct kvm_x86_cpu_property property);
978
kvm_cpu_property(struct kvm_x86_cpu_property property)979 static inline uint32_t kvm_cpu_property(struct kvm_x86_cpu_property property)
980 {
981 return kvm_cpuid_property(kvm_get_supported_cpuid(), property);
982 }
983
kvm_cpu_has_p(struct kvm_x86_cpu_property property)984 static __always_inline bool kvm_cpu_has_p(struct kvm_x86_cpu_property property)
985 {
986 uint32_t max_leaf;
987
988 switch (property.function & 0xc0000000) {
989 case 0:
990 max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_BASIC_LEAF);
991 break;
992 case 0x40000000:
993 max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_KVM_LEAF);
994 break;
995 case 0x80000000:
996 max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_EXT_LEAF);
997 break;
998 case 0xc0000000:
999 max_leaf = kvm_cpu_property(X86_PROPERTY_MAX_CENTAUR_LEAF);
1000 }
1001 return max_leaf >= property.function;
1002 }
1003
kvm_pmu_has(struct kvm_x86_pmu_feature feature)1004 static inline bool kvm_pmu_has(struct kvm_x86_pmu_feature feature)
1005 {
1006 uint32_t nr_bits;
1007
1008 if (feature.f.reg == KVM_CPUID_EBX) {
1009 nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_EBX_BIT_VECTOR_LENGTH);
1010 return nr_bits > feature.f.bit && !kvm_cpu_has(feature.f);
1011 }
1012
1013 TEST_ASSERT_EQ(feature.f.reg, KVM_CPUID_ECX);
1014 nr_bits = kvm_cpu_property(X86_PROPERTY_PMU_NR_FIXED_COUNTERS);
1015 return nr_bits > feature.f.bit || kvm_cpu_has(feature.f);
1016 }
1017
kvm_cpu_supported_xcr0(void)1018 static __always_inline uint64_t kvm_cpu_supported_xcr0(void)
1019 {
1020 if (!kvm_cpu_has_p(X86_PROPERTY_SUPPORTED_XCR0_LO))
1021 return 0;
1022
1023 return kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_LO) |
1024 ((uint64_t)kvm_cpu_property(X86_PROPERTY_SUPPORTED_XCR0_HI) << 32);
1025 }
1026
kvm_cpuid2_size(int nr_entries)1027 static inline size_t kvm_cpuid2_size(int nr_entries)
1028 {
1029 return sizeof(struct kvm_cpuid2) +
1030 sizeof(struct kvm_cpuid_entry2) * nr_entries;
1031 }
1032
1033 /*
1034 * Allocate a "struct kvm_cpuid2* instance, with the 0-length arrary of
1035 * entries sized to hold @nr_entries. The caller is responsible for freeing
1036 * the struct.
1037 */
allocate_kvm_cpuid2(int nr_entries)1038 static inline struct kvm_cpuid2 *allocate_kvm_cpuid2(int nr_entries)
1039 {
1040 struct kvm_cpuid2 *cpuid;
1041
1042 cpuid = malloc(kvm_cpuid2_size(nr_entries));
1043 TEST_ASSERT(cpuid, "-ENOMEM when allocating kvm_cpuid2");
1044
1045 cpuid->nent = nr_entries;
1046
1047 return cpuid;
1048 }
1049
1050 void vcpu_init_cpuid(struct kvm_vcpu *vcpu, const struct kvm_cpuid2 *cpuid);
1051
vcpu_get_cpuid(struct kvm_vcpu * vcpu)1052 static inline void vcpu_get_cpuid(struct kvm_vcpu *vcpu)
1053 {
1054 vcpu_ioctl(vcpu, KVM_GET_CPUID2, vcpu->cpuid);
1055 }
1056
__vcpu_get_cpuid_entry(struct kvm_vcpu * vcpu,uint32_t function,uint32_t index)1057 static inline struct kvm_cpuid_entry2 *__vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
1058 uint32_t function,
1059 uint32_t index)
1060 {
1061 TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first (or equivalent)");
1062
1063 vcpu_get_cpuid(vcpu);
1064
1065 return (struct kvm_cpuid_entry2 *)get_cpuid_entry(vcpu->cpuid,
1066 function, index);
1067 }
1068
vcpu_get_cpuid_entry(struct kvm_vcpu * vcpu,uint32_t function)1069 static inline struct kvm_cpuid_entry2 *vcpu_get_cpuid_entry(struct kvm_vcpu *vcpu,
1070 uint32_t function)
1071 {
1072 return __vcpu_get_cpuid_entry(vcpu, function, 0);
1073 }
1074
__vcpu_set_cpuid(struct kvm_vcpu * vcpu)1075 static inline int __vcpu_set_cpuid(struct kvm_vcpu *vcpu)
1076 {
1077 int r;
1078
1079 TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
1080 r = __vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
1081 if (r)
1082 return r;
1083
1084 /* On success, refresh the cache to pick up adjustments made by KVM. */
1085 vcpu_get_cpuid(vcpu);
1086 return 0;
1087 }
1088
vcpu_set_cpuid(struct kvm_vcpu * vcpu)1089 static inline void vcpu_set_cpuid(struct kvm_vcpu *vcpu)
1090 {
1091 TEST_ASSERT(vcpu->cpuid, "Must do vcpu_init_cpuid() first");
1092 vcpu_ioctl(vcpu, KVM_SET_CPUID2, vcpu->cpuid);
1093
1094 /* Refresh the cache to pick up adjustments made by KVM. */
1095 vcpu_get_cpuid(vcpu);
1096 }
1097
1098 void vcpu_set_cpuid_property(struct kvm_vcpu *vcpu,
1099 struct kvm_x86_cpu_property property,
1100 uint32_t value);
1101 void vcpu_set_cpuid_maxphyaddr(struct kvm_vcpu *vcpu, uint8_t maxphyaddr);
1102
1103 void vcpu_clear_cpuid_entry(struct kvm_vcpu *vcpu, uint32_t function);
1104
vcpu_cpuid_has(struct kvm_vcpu * vcpu,struct kvm_x86_cpu_feature feature)1105 static inline bool vcpu_cpuid_has(struct kvm_vcpu *vcpu,
1106 struct kvm_x86_cpu_feature feature)
1107 {
1108 struct kvm_cpuid_entry2 *entry;
1109
1110 entry = __vcpu_get_cpuid_entry(vcpu, feature.function, feature.index);
1111 return *((&entry->eax) + feature.reg) & BIT(feature.bit);
1112 }
1113
1114 void vcpu_set_or_clear_cpuid_feature(struct kvm_vcpu *vcpu,
1115 struct kvm_x86_cpu_feature feature,
1116 bool set);
1117
vcpu_set_cpuid_feature(struct kvm_vcpu * vcpu,struct kvm_x86_cpu_feature feature)1118 static inline void vcpu_set_cpuid_feature(struct kvm_vcpu *vcpu,
1119 struct kvm_x86_cpu_feature feature)
1120 {
1121 vcpu_set_or_clear_cpuid_feature(vcpu, feature, true);
1122
1123 }
1124
vcpu_clear_cpuid_feature(struct kvm_vcpu * vcpu,struct kvm_x86_cpu_feature feature)1125 static inline void vcpu_clear_cpuid_feature(struct kvm_vcpu *vcpu,
1126 struct kvm_x86_cpu_feature feature)
1127 {
1128 vcpu_set_or_clear_cpuid_feature(vcpu, feature, false);
1129 }
1130
1131 uint64_t vcpu_get_msr(struct kvm_vcpu *vcpu, uint64_t msr_index);
1132 int _vcpu_set_msr(struct kvm_vcpu *vcpu, uint64_t msr_index, uint64_t msr_value);
1133
1134 /*
1135 * Assert on an MSR access(es) and pretty print the MSR name when possible.
1136 * Note, the caller provides the stringified name so that the name of macro is
1137 * printed, not the value the macro resolves to (due to macro expansion).
1138 */
1139 #define TEST_ASSERT_MSR(cond, fmt, msr, str, args...) \
1140 do { \
1141 if (__builtin_constant_p(msr)) { \
1142 TEST_ASSERT(cond, fmt, str, args); \
1143 } else if (!(cond)) { \
1144 char buf[16]; \
1145 \
1146 snprintf(buf, sizeof(buf), "MSR 0x%x", msr); \
1147 TEST_ASSERT(cond, fmt, buf, args); \
1148 } \
1149 } while (0)
1150
1151 /*
1152 * Returns true if KVM should return the last written value when reading an MSR
1153 * from userspace, e.g. the MSR isn't a command MSR, doesn't emulate state that
1154 * is changing, etc. This is NOT an exhaustive list! The intent is to filter
1155 * out MSRs that are not durable _and_ that a selftest wants to write.
1156 */
is_durable_msr(uint32_t msr)1157 static inline bool is_durable_msr(uint32_t msr)
1158 {
1159 return msr != MSR_IA32_TSC;
1160 }
1161
1162 #define vcpu_set_msr(vcpu, msr, val) \
1163 do { \
1164 uint64_t r, v = val; \
1165 \
1166 TEST_ASSERT_MSR(_vcpu_set_msr(vcpu, msr, v) == 1, \
1167 "KVM_SET_MSRS failed on %s, value = 0x%lx", msr, #msr, v); \
1168 if (!is_durable_msr(msr)) \
1169 break; \
1170 r = vcpu_get_msr(vcpu, msr); \
1171 TEST_ASSERT_MSR(r == v, "Set %s to '0x%lx', got back '0x%lx'", msr, #msr, v, r);\
1172 } while (0)
1173
1174 void kvm_get_cpu_address_width(unsigned int *pa_bits, unsigned int *va_bits);
1175 void kvm_init_vm_address_properties(struct kvm_vm *vm);
1176
1177 struct ex_regs {
1178 uint64_t rax, rcx, rdx, rbx;
1179 uint64_t rbp, rsi, rdi;
1180 uint64_t r8, r9, r10, r11;
1181 uint64_t r12, r13, r14, r15;
1182 uint64_t vector;
1183 uint64_t error_code;
1184 uint64_t rip;
1185 uint64_t cs;
1186 uint64_t rflags;
1187 };
1188
1189 struct idt_entry {
1190 uint16_t offset0;
1191 uint16_t selector;
1192 uint16_t ist : 3;
1193 uint16_t : 5;
1194 uint16_t type : 4;
1195 uint16_t : 1;
1196 uint16_t dpl : 2;
1197 uint16_t p : 1;
1198 uint16_t offset1;
1199 uint32_t offset2; uint32_t reserved;
1200 };
1201
1202 void vm_install_exception_handler(struct kvm_vm *vm, int vector,
1203 void (*handler)(struct ex_regs *));
1204
1205 /*
1206 * Exception fixup morphs #DE to an arbitrary magic vector so that '0' can be
1207 * used to signal "no expcetion".
1208 */
1209 #define KVM_MAGIC_DE_VECTOR 0xff
1210
1211 /* If a toddler were to say "abracadabra". */
1212 #define KVM_EXCEPTION_MAGIC 0xabacadabaULL
1213
1214 /*
1215 * KVM selftest exception fixup uses registers to coordinate with the exception
1216 * handler, versus the kernel's in-memory tables and KVM-Unit-Tests's in-memory
1217 * per-CPU data. Using only registers avoids having to map memory into the
1218 * guest, doesn't require a valid, stable GS.base, and reduces the risk of
1219 * for recursive faults when accessing memory in the handler. The downside to
1220 * using registers is that it restricts what registers can be used by the actual
1221 * instruction. But, selftests are 64-bit only, making register* pressure a
1222 * minor concern. Use r9-r11 as they are volatile, i.e. don't need to be saved
1223 * by the callee, and except for r11 are not implicit parameters to any
1224 * instructions. Ideally, fixup would use r8-r10 and thus avoid implicit
1225 * parameters entirely, but Hyper-V's hypercall ABI uses r8 and testing Hyper-V
1226 * is higher priority than testing non-faulting SYSCALL/SYSRET.
1227 *
1228 * Note, the fixup handler deliberately does not handle #DE, i.e. the vector
1229 * is guaranteed to be non-zero on fault.
1230 *
1231 * REGISTER INPUTS:
1232 * r9 = MAGIC
1233 * r10 = RIP
1234 * r11 = new RIP on fault
1235 *
1236 * REGISTER OUTPUTS:
1237 * r9 = exception vector (non-zero)
1238 * r10 = error code
1239 */
1240 #define __KVM_ASM_SAFE(insn, fep) \
1241 "mov $" __stringify(KVM_EXCEPTION_MAGIC) ", %%r9\n\t" \
1242 "lea 1f(%%rip), %%r10\n\t" \
1243 "lea 2f(%%rip), %%r11\n\t" \
1244 fep "1: " insn "\n\t" \
1245 "xor %%r9, %%r9\n\t" \
1246 "2:\n\t" \
1247 "mov %%r9b, %[vector]\n\t" \
1248 "mov %%r10, %[error_code]\n\t"
1249
1250 #define KVM_ASM_SAFE(insn) __KVM_ASM_SAFE(insn, "")
1251 #define KVM_ASM_SAFE_FEP(insn) __KVM_ASM_SAFE(insn, KVM_FEP)
1252
1253 #define KVM_ASM_SAFE_OUTPUTS(v, ec) [vector] "=qm"(v), [error_code] "=rm"(ec)
1254 #define KVM_ASM_SAFE_CLOBBERS "r9", "r10", "r11"
1255
1256 #define kvm_asm_safe(insn, inputs...) \
1257 ({ \
1258 uint64_t ign_error_code; \
1259 uint8_t vector; \
1260 \
1261 asm volatile(KVM_ASM_SAFE(insn) \
1262 : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
1263 : inputs \
1264 : KVM_ASM_SAFE_CLOBBERS); \
1265 vector; \
1266 })
1267
1268 #define kvm_asm_safe_ec(insn, error_code, inputs...) \
1269 ({ \
1270 uint8_t vector; \
1271 \
1272 asm volatile(KVM_ASM_SAFE(insn) \
1273 : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
1274 : inputs \
1275 : KVM_ASM_SAFE_CLOBBERS); \
1276 vector; \
1277 })
1278
1279 #define kvm_asm_safe_fep(insn, inputs...) \
1280 ({ \
1281 uint64_t ign_error_code; \
1282 uint8_t vector; \
1283 \
1284 asm volatile(KVM_ASM_SAFE_FEP(insn) \
1285 : KVM_ASM_SAFE_OUTPUTS(vector, ign_error_code) \
1286 : inputs \
1287 : KVM_ASM_SAFE_CLOBBERS); \
1288 vector; \
1289 })
1290
1291 #define kvm_asm_safe_ec_fep(insn, error_code, inputs...) \
1292 ({ \
1293 uint8_t vector; \
1294 \
1295 asm volatile(KVM_ASM_SAFE_FEP(insn) \
1296 : KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
1297 : inputs \
1298 : KVM_ASM_SAFE_CLOBBERS); \
1299 vector; \
1300 })
1301
1302 #define BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
1303 static inline uint8_t insn##_safe ##_fep(uint32_t idx, uint64_t *val) \
1304 { \
1305 uint64_t error_code; \
1306 uint8_t vector; \
1307 uint32_t a, d; \
1308 \
1309 asm volatile(KVM_ASM_SAFE##_FEP(#insn) \
1310 : "=a"(a), "=d"(d), \
1311 KVM_ASM_SAFE_OUTPUTS(vector, error_code) \
1312 : "c"(idx) \
1313 : KVM_ASM_SAFE_CLOBBERS); \
1314 \
1315 *val = (uint64_t)a | ((uint64_t)d << 32); \
1316 return vector; \
1317 }
1318
1319 /*
1320 * Generate {insn}_safe() and {insn}_safe_fep() helpers for instructions that
1321 * use ECX as in input index, and EDX:EAX as a 64-bit output.
1322 */
1323 #define BUILD_READ_U64_SAFE_HELPERS(insn) \
1324 BUILD_READ_U64_SAFE_HELPER(insn, , ) \
1325 BUILD_READ_U64_SAFE_HELPER(insn, _fep, _FEP) \
1326
1327 BUILD_READ_U64_SAFE_HELPERS(rdmsr)
BUILD_READ_U64_SAFE_HELPERS(rdpmc)1328 BUILD_READ_U64_SAFE_HELPERS(rdpmc)
1329 BUILD_READ_U64_SAFE_HELPERS(xgetbv)
1330
1331 static inline uint8_t wrmsr_safe(uint32_t msr, uint64_t val)
1332 {
1333 return kvm_asm_safe("wrmsr", "a"(val & -1u), "d"(val >> 32), "c"(msr));
1334 }
1335
xsetbv_safe(uint32_t index,uint64_t value)1336 static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value)
1337 {
1338 u32 eax = value;
1339 u32 edx = value >> 32;
1340
1341 return kvm_asm_safe("xsetbv", "a" (eax), "d" (edx), "c" (index));
1342 }
1343
1344 bool kvm_is_tdp_enabled(void);
1345
get_kvm_intel_param_bool(const char * param)1346 static inline bool get_kvm_intel_param_bool(const char *param)
1347 {
1348 return kvm_get_module_param_bool("kvm_intel", param);
1349 }
1350
get_kvm_amd_param_bool(const char * param)1351 static inline bool get_kvm_amd_param_bool(const char *param)
1352 {
1353 return kvm_get_module_param_bool("kvm_amd", param);
1354 }
1355
get_kvm_intel_param_integer(const char * param)1356 static inline int get_kvm_intel_param_integer(const char *param)
1357 {
1358 return kvm_get_module_param_integer("kvm_intel", param);
1359 }
1360
get_kvm_amd_param_integer(const char * param)1361 static inline int get_kvm_amd_param_integer(const char *param)
1362 {
1363 return kvm_get_module_param_integer("kvm_amd", param);
1364 }
1365
kvm_is_pmu_enabled(void)1366 static inline bool kvm_is_pmu_enabled(void)
1367 {
1368 return get_kvm_param_bool("enable_pmu");
1369 }
1370
kvm_is_forced_emulation_enabled(void)1371 static inline bool kvm_is_forced_emulation_enabled(void)
1372 {
1373 return !!get_kvm_param_integer("force_emulation_prefix");
1374 }
1375
kvm_is_unrestricted_guest_enabled(void)1376 static inline bool kvm_is_unrestricted_guest_enabled(void)
1377 {
1378 return get_kvm_intel_param_bool("unrestricted_guest");
1379 }
1380
kvm_is_ignore_msrs(void)1381 static inline bool kvm_is_ignore_msrs(void)
1382 {
1383 return get_kvm_param_bool("ignore_msrs");
1384 }
1385
1386 uint64_t *vm_get_pte(struct kvm_vm *vm, uint64_t vaddr);
1387
1388 uint64_t kvm_hypercall(uint64_t nr, uint64_t a0, uint64_t a1, uint64_t a2,
1389 uint64_t a3);
1390 uint64_t __xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
1391 void xen_hypercall(uint64_t nr, uint64_t a0, void *a1);
1392
__kvm_hypercall_map_gpa_range(uint64_t gpa,uint64_t size,uint64_t flags)1393 static inline uint64_t __kvm_hypercall_map_gpa_range(uint64_t gpa,
1394 uint64_t size, uint64_t flags)
1395 {
1396 return kvm_hypercall(KVM_HC_MAP_GPA_RANGE, gpa, size >> PAGE_SHIFT, flags, 0);
1397 }
1398
kvm_hypercall_map_gpa_range(uint64_t gpa,uint64_t size,uint64_t flags)1399 static inline void kvm_hypercall_map_gpa_range(uint64_t gpa, uint64_t size,
1400 uint64_t flags)
1401 {
1402 uint64_t ret = __kvm_hypercall_map_gpa_range(gpa, size, flags);
1403
1404 GUEST_ASSERT(!ret);
1405 }
1406
1407 /*
1408 * Execute HLT in an STI interrupt shadow to ensure that a pending IRQ that's
1409 * intended to be a wake event arrives *after* HLT is executed. Modern CPUs,
1410 * except for a few oddballs that KVM is unlikely to run on, block IRQs for one
1411 * instruction after STI, *if* RFLAGS.IF=0 before STI. Note, Intel CPUs may
1412 * block other events beyond regular IRQs, e.g. may block NMIs and SMIs too.
1413 */
safe_halt(void)1414 static inline void safe_halt(void)
1415 {
1416 asm volatile("sti; hlt");
1417 }
1418
1419 /*
1420 * Enable interrupts and ensure that interrupts are evaluated upon return from
1421 * this function, i.e. execute a nop to consume the STi interrupt shadow.
1422 */
sti_nop(void)1423 static inline void sti_nop(void)
1424 {
1425 asm volatile ("sti; nop");
1426 }
1427
1428 /*
1429 * Enable interrupts for one instruction (nop), to allow the CPU to process all
1430 * interrupts that are already pending.
1431 */
sti_nop_cli(void)1432 static inline void sti_nop_cli(void)
1433 {
1434 asm volatile ("sti; nop; cli");
1435 }
1436
sti(void)1437 static inline void sti(void)
1438 {
1439 asm volatile("sti");
1440 }
1441
cli(void)1442 static inline void cli(void)
1443 {
1444 asm volatile ("cli");
1445 }
1446
1447 void __vm_xsave_require_permission(uint64_t xfeature, const char *name);
1448
1449 #define vm_xsave_require_permission(xfeature) \
1450 __vm_xsave_require_permission(xfeature, #xfeature)
1451
1452 enum pg_level {
1453 PG_LEVEL_NONE,
1454 PG_LEVEL_4K,
1455 PG_LEVEL_2M,
1456 PG_LEVEL_1G,
1457 PG_LEVEL_512G,
1458 PG_LEVEL_256T
1459 };
1460
1461 #define PG_LEVEL_SHIFT(_level) ((_level - 1) * 9 + 12)
1462 #define PG_LEVEL_SIZE(_level) (1ull << PG_LEVEL_SHIFT(_level))
1463
1464 #define PG_SIZE_4K PG_LEVEL_SIZE(PG_LEVEL_4K)
1465 #define PG_SIZE_2M PG_LEVEL_SIZE(PG_LEVEL_2M)
1466 #define PG_SIZE_1G PG_LEVEL_SIZE(PG_LEVEL_1G)
1467
1468 #define PTE_PRESENT_MASK(mmu) ((mmu)->arch.pte_masks.present)
1469 #define PTE_WRITABLE_MASK(mmu) ((mmu)->arch.pte_masks.writable)
1470 #define PTE_USER_MASK(mmu) ((mmu)->arch.pte_masks.user)
1471 #define PTE_READABLE_MASK(mmu) ((mmu)->arch.pte_masks.readable)
1472 #define PTE_EXECUTABLE_MASK(mmu) ((mmu)->arch.pte_masks.executable)
1473 #define PTE_ACCESSED_MASK(mmu) ((mmu)->arch.pte_masks.accessed)
1474 #define PTE_DIRTY_MASK(mmu) ((mmu)->arch.pte_masks.dirty)
1475 #define PTE_HUGE_MASK(mmu) ((mmu)->arch.pte_masks.huge)
1476 #define PTE_NX_MASK(mmu) ((mmu)->arch.pte_masks.nx)
1477 #define PTE_C_BIT_MASK(mmu) ((mmu)->arch.pte_masks.c)
1478 #define PTE_S_BIT_MASK(mmu) ((mmu)->arch.pte_masks.s)
1479 #define PTE_ALWAYS_SET_MASK(mmu) ((mmu)->arch.pte_masks.always_set)
1480
1481 /*
1482 * For PTEs without a PRESENT bit (i.e. EPT entries), treat the PTE as present
1483 * if it's executable or readable, as EPT supports execute-only PTEs, but not
1484 * write-only PTEs.
1485 */
1486 #define is_present_pte(mmu, pte) \
1487 (PTE_PRESENT_MASK(mmu) ? \
1488 !!(*(pte) & PTE_PRESENT_MASK(mmu)) : \
1489 !!(*(pte) & (PTE_READABLE_MASK(mmu) | PTE_EXECUTABLE_MASK(mmu))))
1490 #define is_executable_pte(mmu, pte) \
1491 ((*(pte) & (PTE_EXECUTABLE_MASK(mmu) | PTE_NX_MASK(mmu))) == PTE_EXECUTABLE_MASK(mmu))
1492 #define is_writable_pte(mmu, pte) (!!(*(pte) & PTE_WRITABLE_MASK(mmu)))
1493 #define is_user_pte(mmu, pte) (!!(*(pte) & PTE_USER_MASK(mmu)))
1494 #define is_accessed_pte(mmu, pte) (!!(*(pte) & PTE_ACCESSED_MASK(mmu)))
1495 #define is_dirty_pte(mmu, pte) (!!(*(pte) & PTE_DIRTY_MASK(mmu)))
1496 #define is_huge_pte(mmu, pte) (!!(*(pte) & PTE_HUGE_MASK(mmu)))
1497 #define is_nx_pte(mmu, pte) (!is_executable_pte(mmu, pte))
1498
1499 void tdp_mmu_init(struct kvm_vm *vm, int pgtable_levels,
1500 struct pte_masks *pte_masks);
1501
1502 void __virt_pg_map(struct kvm_vm *vm, struct kvm_mmu *mmu, uint64_t vaddr,
1503 uint64_t paddr, int level);
1504 void virt_map_level(struct kvm_vm *vm, uint64_t vaddr, uint64_t paddr,
1505 uint64_t nr_bytes, int level);
1506
1507 void vm_enable_tdp(struct kvm_vm *vm);
1508 bool kvm_cpu_has_tdp(void);
1509 void tdp_map(struct kvm_vm *vm, uint64_t nested_paddr, uint64_t paddr, uint64_t size);
1510 void tdp_identity_map_default_memslots(struct kvm_vm *vm);
1511 void tdp_identity_map_1g(struct kvm_vm *vm, uint64_t addr, uint64_t size);
1512 uint64_t *tdp_get_pte(struct kvm_vm *vm, uint64_t l2_gpa);
1513
1514 /*
1515 * Basic CPU control in CR0
1516 */
1517 #define X86_CR0_PE (1UL<<0) /* Protection Enable */
1518 #define X86_CR0_MP (1UL<<1) /* Monitor Coprocessor */
1519 #define X86_CR0_EM (1UL<<2) /* Emulation */
1520 #define X86_CR0_TS (1UL<<3) /* Task Switched */
1521 #define X86_CR0_ET (1UL<<4) /* Extension Type */
1522 #define X86_CR0_NE (1UL<<5) /* Numeric Error */
1523 #define X86_CR0_WP (1UL<<16) /* Write Protect */
1524 #define X86_CR0_AM (1UL<<18) /* Alignment Mask */
1525 #define X86_CR0_NW (1UL<<29) /* Not Write-through */
1526 #define X86_CR0_CD (1UL<<30) /* Cache Disable */
1527 #define X86_CR0_PG (1UL<<31) /* Paging */
1528
1529 #define PFERR_PRESENT_BIT 0
1530 #define PFERR_WRITE_BIT 1
1531 #define PFERR_USER_BIT 2
1532 #define PFERR_RSVD_BIT 3
1533 #define PFERR_FETCH_BIT 4
1534 #define PFERR_PK_BIT 5
1535 #define PFERR_SGX_BIT 15
1536 #define PFERR_GUEST_FINAL_BIT 32
1537 #define PFERR_GUEST_PAGE_BIT 33
1538 #define PFERR_IMPLICIT_ACCESS_BIT 48
1539
1540 #define PFERR_PRESENT_MASK BIT(PFERR_PRESENT_BIT)
1541 #define PFERR_WRITE_MASK BIT(PFERR_WRITE_BIT)
1542 #define PFERR_USER_MASK BIT(PFERR_USER_BIT)
1543 #define PFERR_RSVD_MASK BIT(PFERR_RSVD_BIT)
1544 #define PFERR_FETCH_MASK BIT(PFERR_FETCH_BIT)
1545 #define PFERR_PK_MASK BIT(PFERR_PK_BIT)
1546 #define PFERR_SGX_MASK BIT(PFERR_SGX_BIT)
1547 #define PFERR_GUEST_FINAL_MASK BIT_ULL(PFERR_GUEST_FINAL_BIT)
1548 #define PFERR_GUEST_PAGE_MASK BIT_ULL(PFERR_GUEST_PAGE_BIT)
1549 #define PFERR_IMPLICIT_ACCESS BIT_ULL(PFERR_IMPLICIT_ACCESS_BIT)
1550
1551 bool sys_clocksource_is_based_on_tsc(void);
1552
1553 #endif /* SELFTEST_KVM_PROCESSOR_H */
1554