1 /* SPDX-License-Identifier: GPL-2.0 */ 2 #ifndef ARCH_X86_KVM_REGS_H 3 #define ARCH_X86_KVM_REGS_H 4 5 #include <linux/kvm_host.h> 6 7 #define KVM_POSSIBLE_CR0_GUEST_BITS (X86_CR0_TS | X86_CR0_WP) 8 #define KVM_POSSIBLE_CR4_GUEST_BITS \ 9 (X86_CR4_PVI | X86_CR4_DE | X86_CR4_PCE | X86_CR4_OSFXSR \ 10 | X86_CR4_OSXMMEXCPT | X86_CR4_PGE | X86_CR4_TSD | X86_CR4_FSGSBASE \ 11 | X86_CR4_CET) 12 13 #define X86_CR0_PDPTR_BITS (X86_CR0_CD | X86_CR0_NW | X86_CR0_PG) 14 #define X86_CR4_TLBFLUSH_BITS (X86_CR4_PGE | X86_CR4_PCIDE | X86_CR4_PAE | X86_CR4_SMEP) 15 #define X86_CR4_PDPTR_BITS (X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE | X86_CR4_SMEP) 16 17 static_assert(!(KVM_POSSIBLE_CR0_GUEST_BITS & X86_CR0_PDPTR_BITS)); 18 19 static inline bool is_long_mode(struct kvm_vcpu *vcpu) 20 { 21 #ifdef CONFIG_X86_64 22 return !!(vcpu->arch.efer & EFER_LMA); 23 #else 24 return false; 25 #endif 26 } 27 28 static inline bool is_64_bit_mode(struct kvm_vcpu *vcpu) 29 { 30 int cs_db, cs_l; 31 32 WARN_ON_ONCE(vcpu->arch.guest_state_protected); 33 34 if (!is_long_mode(vcpu)) 35 return false; 36 kvm_x86_call(get_cs_db_l_bits)(vcpu, &cs_db, &cs_l); 37 return cs_l; 38 } 39 40 static inline bool is_64_bit_hypercall(struct kvm_vcpu *vcpu) 41 { 42 #ifdef CONFIG_X86_64 43 /* 44 * If running with protected guest state, the CS register is not 45 * accessible. The hypercall register values will have had to been 46 * provided in 64-bit mode, so assume the guest is in 64-bit. 47 */ 48 return vcpu->arch.guest_state_protected || is_64_bit_mode(vcpu); 49 #else 50 return false; 51 #endif 52 } 53 54 static __always_inline unsigned long kvm_reg_mode_mask(struct kvm_vcpu *vcpu) 55 { 56 #ifdef CONFIG_X86_64 57 return is_64_bit_mode(vcpu) ? GENMASK(63, 0) : GENMASK(31, 0); 58 #else 59 return GENMASK(31, 0); 60 #endif 61 } 62 63 #define __BUILD_KVM_GPR_ACCESSORS(lname, uname) \ 64 static __always_inline unsigned long kvm_##lname##_read(struct kvm_vcpu *vcpu) \ 65 { \ 66 return vcpu->arch.regs[VCPU_REGS_##uname] & kvm_reg_mode_mask(vcpu); \ 67 } \ 68 static __always_inline unsigned long kvm_##lname##_read_raw(struct kvm_vcpu *vcpu) \ 69 { \ 70 return vcpu->arch.regs[VCPU_REGS_##uname]; \ 71 } \ 72 static __always_inline void kvm_##lname##_write_raw(struct kvm_vcpu *vcpu, \ 73 unsigned long val) \ 74 { \ 75 vcpu->arch.regs[VCPU_REGS_##uname] = val; \ 76 } 77 #define BUILD_KVM_GPR_ACCESSORS(lname, uname) \ 78 static __always_inline u32 kvm_e##lname##_read(struct kvm_vcpu *vcpu) \ 79 { \ 80 return vcpu->arch.regs[VCPU_REGS_##uname]; \ 81 } \ 82 static __always_inline void kvm_e##lname##_write(struct kvm_vcpu *vcpu, u32 val) \ 83 { \ 84 vcpu->arch.regs[VCPU_REGS_##uname] = val; \ 85 } \ 86 __BUILD_KVM_GPR_ACCESSORS(r##lname, uname) 87 88 BUILD_KVM_GPR_ACCESSORS(ax, RAX) 89 BUILD_KVM_GPR_ACCESSORS(bx, RBX) 90 BUILD_KVM_GPR_ACCESSORS(cx, RCX) 91 BUILD_KVM_GPR_ACCESSORS(dx, RDX) 92 BUILD_KVM_GPR_ACCESSORS(bp, RBP) 93 BUILD_KVM_GPR_ACCESSORS(si, RSI) 94 BUILD_KVM_GPR_ACCESSORS(di, RDI) 95 #ifdef CONFIG_X86_64 96 __BUILD_KVM_GPR_ACCESSORS(r8, R8) 97 __BUILD_KVM_GPR_ACCESSORS(r9, R9) 98 __BUILD_KVM_GPR_ACCESSORS(r10, R10) 99 __BUILD_KVM_GPR_ACCESSORS(r11, R11) 100 __BUILD_KVM_GPR_ACCESSORS(r12, R12) 101 __BUILD_KVM_GPR_ACCESSORS(r13, R13) 102 __BUILD_KVM_GPR_ACCESSORS(r14, R14) 103 __BUILD_KVM_GPR_ACCESSORS(r15, R15) 104 #endif 105 106 /* 107 * Using the register cache from interrupt context is generally not allowed, as 108 * caching a register and marking it available/dirty can't be done atomically, 109 * i.e. accesses from interrupt context may clobber state or read stale data if 110 * the vCPU task is in the process of updating the cache. The exception is if 111 * KVM is handling a PMI IRQ/NMI VM-Exit, as that bound code sequence doesn't 112 * touch the cache, it runs after the cache is reset (post VM-Exit), and PMIs 113 * need to access several registers that are cacheable. 114 */ 115 #define kvm_assert_register_caching_allowed(vcpu) \ 116 lockdep_assert_once(in_task() || kvm_arch_pmi_in_guest(vcpu)) 117 118 /* 119 * avail dirty 120 * 0 0 register in VMCS/VMCB 121 * 0 1 *INVALID* 122 * 1 0 register in vcpu->arch 123 * 1 1 register in vcpu->arch, needs to be stored back 124 */ 125 static inline bool kvm_register_is_available(struct kvm_vcpu *vcpu, 126 enum kvm_reg reg) 127 { 128 kvm_assert_register_caching_allowed(vcpu); 129 return test_bit(reg, vcpu->arch.regs_avail); 130 } 131 132 static inline bool kvm_register_is_dirty(struct kvm_vcpu *vcpu, 133 enum kvm_reg reg) 134 { 135 kvm_assert_register_caching_allowed(vcpu); 136 return test_bit(reg, vcpu->arch.regs_dirty); 137 } 138 139 static inline void kvm_register_mark_for_reload(struct kvm_vcpu *vcpu, 140 enum kvm_reg reg) 141 { 142 kvm_assert_register_caching_allowed(vcpu); 143 __clear_bit(reg, vcpu->arch.regs_avail); 144 __clear_bit(reg, vcpu->arch.regs_dirty); 145 } 146 147 static inline void kvm_register_mark_available(struct kvm_vcpu *vcpu, 148 enum kvm_reg reg) 149 { 150 kvm_assert_register_caching_allowed(vcpu); 151 __set_bit(reg, vcpu->arch.regs_avail); 152 } 153 154 static inline void kvm_register_mark_dirty(struct kvm_vcpu *vcpu, 155 enum kvm_reg reg) 156 { 157 kvm_assert_register_caching_allowed(vcpu); 158 __set_bit(reg, vcpu->arch.regs_avail); 159 __set_bit(reg, vcpu->arch.regs_dirty); 160 } 161 162 /* 163 * kvm_register_test_and_mark_available() is a special snowflake that uses an 164 * arch bitop directly to avoid the explicit instrumentation that comes with 165 * the generic bitops. This allows code that cannot be instrumented (noinstr 166 * functions), e.g. the low level VM-Enter/VM-Exit paths, to cache registers. 167 */ 168 static __always_inline bool kvm_register_test_and_mark_available(struct kvm_vcpu *vcpu, 169 enum kvm_reg reg) 170 { 171 kvm_assert_register_caching_allowed(vcpu); 172 return arch___test_and_set_bit(reg, vcpu->arch.regs_avail); 173 } 174 175 static __always_inline void kvm_clear_available_registers(struct kvm_vcpu *vcpu, 176 unsigned long clear_mask) 177 { 178 BUILD_BUG_ON(sizeof(clear_mask) != sizeof(vcpu->arch.regs_avail[0])); 179 BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.regs_avail) != 1); 180 181 /* 182 * Note the bitwise-AND! In practice, a straight write would also work 183 * as KVM initializes the mask to all ones and never clears registers 184 * that are eagerly synchronized. Using a bitwise-AND adds a bit of 185 * sanity checking as incorrectly marking an eagerly sync'd register 186 * unavailable will generate a WARN due to an unexpected cache request. 187 */ 188 vcpu->arch.regs_avail[0] &= ~clear_mask; 189 } 190 191 static __always_inline void kvm_reset_dirty_registers(struct kvm_vcpu *vcpu) 192 { 193 BUILD_BUG_ON(ARRAY_SIZE(vcpu->arch.regs_dirty) != 1); 194 vcpu->arch.regs_dirty[0] = 0; 195 } 196 197 /* 198 * The "raw" register helpers are only for cases where the full 64 bits of a 199 * register are read/written irrespective of current vCPU mode. In other words, 200 * odds are good you shouldn't be using the raw variants. 201 */ 202 static inline unsigned long kvm_register_read_raw(struct kvm_vcpu *vcpu, int reg) 203 { 204 if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_GENERAL_PURPOSE_REGS)) 205 return 0; 206 207 if (!kvm_register_is_available(vcpu, reg)) 208 kvm_x86_call(cache_reg)(vcpu, reg); 209 210 return vcpu->arch.regs[reg]; 211 } 212 213 static inline unsigned long kvm_register_read(struct kvm_vcpu *vcpu, int reg) 214 { 215 return kvm_register_read_raw(vcpu, reg) & kvm_reg_mode_mask(vcpu); 216 } 217 218 static inline void kvm_register_write_raw(struct kvm_vcpu *vcpu, int reg, 219 unsigned long val) 220 { 221 if (WARN_ON_ONCE((unsigned int)reg >= NR_VCPU_GENERAL_PURPOSE_REGS)) 222 return; 223 224 vcpu->arch.regs[reg] = val; 225 kvm_register_mark_dirty(vcpu, reg); 226 } 227 228 static inline void kvm_register_write(struct kvm_vcpu *vcpu, 229 int reg, unsigned long val) 230 { 231 return kvm_register_write_raw(vcpu, reg, val & kvm_reg_mode_mask(vcpu)); 232 } 233 234 static inline unsigned long kvm_rip_read(struct kvm_vcpu *vcpu) 235 { 236 if (!kvm_register_is_available(vcpu, VCPU_REG_RIP)) 237 kvm_x86_call(cache_reg)(vcpu, VCPU_REG_RIP); 238 239 return vcpu->arch.rip; 240 } 241 242 static inline void kvm_rip_write(struct kvm_vcpu *vcpu, unsigned long val) 243 { 244 vcpu->arch.rip = val; 245 kvm_register_mark_dirty(vcpu, VCPU_REG_RIP); 246 } 247 248 static inline unsigned long kvm_rsp_read(struct kvm_vcpu *vcpu) 249 { 250 return kvm_register_read_raw(vcpu, VCPU_REGS_RSP); 251 } 252 253 static inline void kvm_rsp_write(struct kvm_vcpu *vcpu, unsigned long val) 254 { 255 kvm_register_write_raw(vcpu, VCPU_REGS_RSP, val); 256 } 257 258 static inline u64 kvm_read_edx_eax(struct kvm_vcpu *vcpu) 259 { 260 return kvm_eax_read(vcpu) | (u64)(kvm_edx_read(vcpu)) << 32; 261 } 262 263 static inline u64 kvm_pdptr_read(struct kvm_vcpu *vcpu, int index) 264 { 265 might_sleep(); /* on svm */ 266 267 if (!kvm_register_is_available(vcpu, VCPU_REG_PDPTR)) 268 kvm_x86_call(cache_reg)(vcpu, VCPU_REG_PDPTR); 269 270 return vcpu->arch.pdptrs[index]; 271 } 272 273 static inline void kvm_pdptr_write(struct kvm_vcpu *vcpu, int index, u64 value) 274 { 275 vcpu->arch.pdptrs[index] = value; 276 } 277 278 static inline ulong kvm_read_cr0_bits(struct kvm_vcpu *vcpu, ulong mask) 279 { 280 ulong tmask = mask & KVM_POSSIBLE_CR0_GUEST_BITS; 281 if ((tmask & vcpu->arch.cr0_guest_owned_bits) && 282 !kvm_register_is_available(vcpu, VCPU_REG_CR0)) 283 kvm_x86_call(cache_reg)(vcpu, VCPU_REG_CR0); 284 return vcpu->arch.cr0 & mask; 285 } 286 287 static __always_inline bool kvm_is_cr0_bit_set(struct kvm_vcpu *vcpu, 288 unsigned long cr0_bit) 289 { 290 BUILD_BUG_ON(!is_power_of_2(cr0_bit)); 291 292 return !!kvm_read_cr0_bits(vcpu, cr0_bit); 293 } 294 295 static inline ulong kvm_read_cr0(struct kvm_vcpu *vcpu) 296 { 297 return kvm_read_cr0_bits(vcpu, ~0UL); 298 } 299 300 static inline ulong kvm_read_cr4_bits(struct kvm_vcpu *vcpu, ulong mask) 301 { 302 ulong tmask = mask & KVM_POSSIBLE_CR4_GUEST_BITS; 303 if ((tmask & vcpu->arch.cr4_guest_owned_bits) && 304 !kvm_register_is_available(vcpu, VCPU_REG_CR4)) 305 kvm_x86_call(cache_reg)(vcpu, VCPU_REG_CR4); 306 return vcpu->arch.cr4 & mask; 307 } 308 309 static __always_inline bool kvm_is_cr4_bit_set(struct kvm_vcpu *vcpu, 310 unsigned long cr4_bit) 311 { 312 BUILD_BUG_ON(!is_power_of_2(cr4_bit)); 313 314 return !!kvm_read_cr4_bits(vcpu, cr4_bit); 315 } 316 317 static inline ulong kvm_read_cr3(struct kvm_vcpu *vcpu) 318 { 319 if (!kvm_register_is_available(vcpu, VCPU_REG_CR3)) 320 kvm_x86_call(cache_reg)(vcpu, VCPU_REG_CR3); 321 return vcpu->arch.cr3; 322 } 323 324 static inline ulong kvm_read_cr4(struct kvm_vcpu *vcpu) 325 { 326 return kvm_read_cr4_bits(vcpu, ~0UL); 327 } 328 329 static inline bool __kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4) 330 { 331 return !(cr4 & vcpu->arch.cr4_guest_rsvd_bits); 332 } 333 334 #define __cr4_reserved_bits(__cpu_has, __c) \ 335 ({ \ 336 u64 __reserved_bits = CR4_RESERVED_BITS; \ 337 \ 338 if (!__cpu_has(__c, X86_FEATURE_XSAVE)) \ 339 __reserved_bits |= X86_CR4_OSXSAVE; \ 340 if (!__cpu_has(__c, X86_FEATURE_SMEP)) \ 341 __reserved_bits |= X86_CR4_SMEP; \ 342 if (!__cpu_has(__c, X86_FEATURE_SMAP)) \ 343 __reserved_bits |= X86_CR4_SMAP; \ 344 if (!__cpu_has(__c, X86_FEATURE_FSGSBASE)) \ 345 __reserved_bits |= X86_CR4_FSGSBASE; \ 346 if (!__cpu_has(__c, X86_FEATURE_PKU)) \ 347 __reserved_bits |= X86_CR4_PKE; \ 348 if (!__cpu_has(__c, X86_FEATURE_LA57)) \ 349 __reserved_bits |= X86_CR4_LA57; \ 350 if (!__cpu_has(__c, X86_FEATURE_UMIP)) \ 351 __reserved_bits |= X86_CR4_UMIP; \ 352 if (!__cpu_has(__c, X86_FEATURE_VMX)) \ 353 __reserved_bits |= X86_CR4_VMXE; \ 354 if (!__cpu_has(__c, X86_FEATURE_PCID)) \ 355 __reserved_bits |= X86_CR4_PCIDE; \ 356 if (!__cpu_has(__c, X86_FEATURE_LAM)) \ 357 __reserved_bits |= X86_CR4_LAM_SUP; \ 358 if (!__cpu_has(__c, X86_FEATURE_SHSTK) && \ 359 !__cpu_has(__c, X86_FEATURE_IBT)) \ 360 __reserved_bits |= X86_CR4_CET; \ 361 __reserved_bits; \ 362 }) 363 364 static inline bool is_protmode(struct kvm_vcpu *vcpu) 365 { 366 return kvm_is_cr0_bit_set(vcpu, X86_CR0_PE); 367 } 368 369 static inline bool is_pae(struct kvm_vcpu *vcpu) 370 { 371 return kvm_is_cr4_bit_set(vcpu, X86_CR4_PAE); 372 } 373 374 static inline bool is_pse(struct kvm_vcpu *vcpu) 375 { 376 return kvm_is_cr4_bit_set(vcpu, X86_CR4_PSE); 377 } 378 379 static inline bool is_paging(struct kvm_vcpu *vcpu) 380 { 381 return likely(kvm_is_cr0_bit_set(vcpu, X86_CR0_PG)); 382 } 383 384 static inline bool is_pae_paging(struct kvm_vcpu *vcpu) 385 { 386 return !is_long_mode(vcpu) && is_pae(vcpu) && is_paging(vcpu); 387 } 388 389 static inline bool kvm_dr7_valid(u64 data) 390 { 391 /* Bits [63:32] are reserved */ 392 return !(data >> 32); 393 } 394 static inline bool kvm_dr6_valid(u64 data) 395 { 396 /* Bits [63:32] are reserved */ 397 return !(data >> 32); 398 } 399 400 static inline void enter_guest_mode(struct kvm_vcpu *vcpu) 401 { 402 vcpu->arch.hflags |= HF_GUEST_MASK; 403 vcpu->stat.guest_mode = 1; 404 } 405 406 static inline void leave_guest_mode(struct kvm_vcpu *vcpu) 407 { 408 vcpu->arch.hflags &= ~HF_GUEST_MASK; 409 410 if (vcpu->arch.load_eoi_exitmap_pending) { 411 vcpu->arch.load_eoi_exitmap_pending = false; 412 kvm_make_request(KVM_REQ_LOAD_EOI_EXITMAP, vcpu); 413 } 414 415 vcpu->stat.guest_mode = 0; 416 } 417 418 static inline bool is_guest_mode(struct kvm_vcpu *vcpu) 419 { 420 return vcpu->arch.hflags & HF_GUEST_MASK; 421 } 422 423 #endif 424