1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/cpu.h> 3 #include <linux/cpumask.h> 4 #include <linux/errno.h> 5 #include <linux/kvm_types.h> 6 #include <linux/list.h> 7 #include <linux/percpu.h> 8 9 #include <asm/perf_event.h> 10 #include <asm/processor.h> 11 #include <asm/virt.h> 12 #include <asm/vmx.h> 13 14 struct x86_virt_ops { 15 int feature; 16 int (*enable_virtualization_cpu)(void); 17 int (*disable_virtualization_cpu)(void); 18 void (*emergency_disable_virtualization_cpu)(void); 19 }; 20 static struct x86_virt_ops virt_ops __ro_after_init; 21 22 __visible bool virt_rebooting; 23 EXPORT_SYMBOL_FOR_KVM(virt_rebooting); 24 25 static DEFINE_PER_CPU(int, virtualization_nr_users); 26 27 static cpu_emergency_virt_cb __rcu *kvm_emergency_callback; 28 29 void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback) 30 { 31 if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback))) 32 return; 33 34 rcu_assign_pointer(kvm_emergency_callback, callback); 35 } 36 EXPORT_SYMBOL_FOR_KVM(x86_virt_register_emergency_callback); 37 38 void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback) 39 { 40 if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback) != callback)) 41 return; 42 43 rcu_assign_pointer(kvm_emergency_callback, NULL); 44 synchronize_rcu(); 45 } 46 EXPORT_SYMBOL_FOR_KVM(x86_virt_unregister_emergency_callback); 47 48 static void x86_virt_invoke_kvm_emergency_callback(void) 49 { 50 cpu_emergency_virt_cb *kvm_callback; 51 52 kvm_callback = rcu_dereference(kvm_emergency_callback); 53 if (kvm_callback) 54 kvm_callback(); 55 } 56 57 #if IS_ENABLED(CONFIG_KVM_INTEL) 58 static DEFINE_PER_CPU(struct vmcs *, root_vmcs); 59 60 static int x86_virt_cpu_vmxon(void) 61 { 62 u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id())); 63 u64 msr; 64 65 cr4_set_bits(X86_CR4_VMXE); 66 67 asm goto("1: vmxon %[vmxon_pointer]\n\t" 68 _ASM_EXTABLE(1b, %l[fault]) 69 : : [vmxon_pointer] "m"(vmxon_pointer) 70 : : fault); 71 return 0; 72 73 fault: 74 WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n", 75 rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr); 76 cr4_clear_bits(X86_CR4_VMXE); 77 78 return -EFAULT; 79 } 80 81 static int x86_vmx_enable_virtualization_cpu(void) 82 { 83 int r; 84 85 if (cr4_read_shadow() & X86_CR4_VMXE) 86 return -EBUSY; 87 88 intel_pt_handle_vmx(1); 89 90 r = x86_virt_cpu_vmxon(); 91 if (r) { 92 intel_pt_handle_vmx(0); 93 return r; 94 } 95 96 return 0; 97 } 98 99 /* 100 * Disable VMX and clear CR4.VMXE (even if VMXOFF faults) 101 * 102 * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to 103 * atomically track post-VMXON state, e.g. this may be called in NMI context. 104 * Eat all faults as all other faults on VMXOFF faults are mode related, i.e. 105 * faults are guaranteed to be due to the !post-VMXON check unless the CPU is 106 * magically in RM, VM86, compat mode, or at CPL>0. 107 */ 108 static int x86_vmx_disable_virtualization_cpu(void) 109 { 110 int r = -EIO; 111 112 asm goto("1: vmxoff\n\t" 113 _ASM_EXTABLE(1b, %l[fault]) 114 ::: "cc", "memory" : fault); 115 r = 0; 116 117 fault: 118 cr4_clear_bits(X86_CR4_VMXE); 119 intel_pt_handle_vmx(0); 120 return r; 121 } 122 123 static void x86_vmx_emergency_disable_virtualization_cpu(void) 124 { 125 virt_rebooting = true; 126 127 /* 128 * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be 129 * set in task context. If this races with _another_ emergency call 130 * from NMI context, VMCLEAR (in KVM) and VMXOFF may #UD, but KVM and 131 * the kernel will eat those faults due to virt_rebooting being set by 132 * the interrupting NMI callback. 133 */ 134 if (!(__read_cr4() & X86_CR4_VMXE)) 135 return; 136 137 x86_virt_invoke_kvm_emergency_callback(); 138 139 x86_vmx_disable_virtualization_cpu(); 140 } 141 142 static __init void x86_vmx_exit(void) 143 { 144 int cpu; 145 146 for_each_possible_cpu(cpu) { 147 free_page((unsigned long)per_cpu(root_vmcs, cpu)); 148 per_cpu(root_vmcs, cpu) = NULL; 149 } 150 } 151 152 static __init int __x86_vmx_init(void) 153 { 154 const struct x86_virt_ops vmx_ops = { 155 .feature = X86_FEATURE_VMX, 156 .enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu, 157 .disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu, 158 .emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu, 159 }; 160 161 u64 basic_msr; 162 u32 rev_id; 163 int cpu; 164 165 if (!cpu_feature_enabled(X86_FEATURE_VMX)) 166 return -EOPNOTSUPP; 167 168 rdmsrq(MSR_IA32_VMX_BASIC, basic_msr); 169 170 /* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */ 171 if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE)) 172 return -EIO; 173 174 /* 175 * Even if eVMCS is enabled (or will be enabled?), and even though not 176 * explicitly documented by TLFS, the root VMCS passed to VMXON should 177 * still be marked with the revision_id reported by the physical CPU. 178 */ 179 rev_id = vmx_basic_vmcs_revision_id(basic_msr); 180 181 for_each_possible_cpu(cpu) { 182 int node = cpu_to_node(cpu); 183 struct page *page; 184 struct vmcs *vmcs; 185 186 page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0); 187 if (WARN_ON_ONCE(!page)) { 188 x86_vmx_exit(); 189 return -ENOMEM; 190 } 191 192 vmcs = page_address(page); 193 vmcs->hdr.revision_id = rev_id; 194 per_cpu(root_vmcs, cpu) = vmcs; 195 } 196 197 memcpy(&virt_ops, &vmx_ops, sizeof(virt_ops)); 198 return 0; 199 } 200 201 static __init int x86_vmx_init(void) 202 { 203 int r; 204 205 r = __x86_vmx_init(); 206 if (r) 207 setup_clear_cpu_cap(X86_FEATURE_VMX); 208 return r; 209 } 210 #else 211 static __init int x86_vmx_init(void) { return -EOPNOTSUPP; } 212 static __init void x86_vmx_exit(void) { } 213 #endif 214 215 #if IS_ENABLED(CONFIG_KVM_AMD) 216 static int x86_svm_enable_virtualization_cpu(void) 217 { 218 u64 efer; 219 220 rdmsrq(MSR_EFER, efer); 221 if (efer & EFER_SVME) 222 return -EBUSY; 223 224 wrmsrq(MSR_EFER, efer | EFER_SVME); 225 return 0; 226 } 227 228 static int x86_svm_disable_virtualization_cpu(void) 229 { 230 int r = -EIO; 231 u64 efer; 232 233 /* 234 * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and 235 * NMI aren't blocked. 236 */ 237 asm goto("1: stgi\n\t" 238 _ASM_EXTABLE(1b, %l[fault]) 239 ::: "memory" : fault); 240 r = 0; 241 242 fault: 243 rdmsrq(MSR_EFER, efer); 244 wrmsrq(MSR_EFER, efer & ~EFER_SVME); 245 return r; 246 } 247 248 static void x86_svm_emergency_disable_virtualization_cpu(void) 249 { 250 u64 efer; 251 252 virt_rebooting = true; 253 254 rdmsrq(MSR_EFER, efer); 255 if (!(efer & EFER_SVME)) 256 return; 257 258 x86_virt_invoke_kvm_emergency_callback(); 259 260 x86_svm_disable_virtualization_cpu(); 261 } 262 263 static __init int x86_svm_init(void) 264 { 265 const struct x86_virt_ops svm_ops = { 266 .feature = X86_FEATURE_SVM, 267 .enable_virtualization_cpu = x86_svm_enable_virtualization_cpu, 268 .disable_virtualization_cpu = x86_svm_disable_virtualization_cpu, 269 .emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu, 270 }; 271 272 if (!cpu_feature_enabled(X86_FEATURE_SVM) || 273 cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT)) 274 return -EOPNOTSUPP; 275 276 memcpy(&virt_ops, &svm_ops, sizeof(virt_ops)); 277 return 0; 278 } 279 #else 280 static __init int x86_svm_init(void) { return -EOPNOTSUPP; } 281 #endif 282 283 int x86_virt_get_ref(int feat) 284 { 285 int r; 286 287 /* Ensure the !feature check can't get false positives. */ 288 BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX); 289 290 if (!virt_ops.feature || virt_ops.feature != feat) 291 return -EOPNOTSUPP; 292 293 guard(preempt)(); 294 295 if (this_cpu_inc_return(virtualization_nr_users) > 1) 296 return 0; 297 298 r = virt_ops.enable_virtualization_cpu(); 299 if (r) 300 WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users)); 301 302 return r; 303 } 304 EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref); 305 306 void x86_virt_put_ref(int feat) 307 { 308 guard(preempt)(); 309 310 if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) || 311 this_cpu_dec_return(virtualization_nr_users)) 312 return; 313 314 BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting); 315 } 316 EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref); 317 318 /* 319 * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during 320 * reboot. VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if 321 * GIF=0, i.e. if the crash occurred between CLGI and STGI. 322 */ 323 int x86_virt_emergency_disable_virtualization_cpu(void) 324 { 325 if (!virt_ops.feature) 326 return -EOPNOTSUPP; 327 328 /* 329 * IRQs must be disabled as virtualization is enabled in hardware via 330 * function call IPIs, i.e. IRQs need to be disabled to guarantee 331 * virtualization stays disabled. 332 */ 333 lockdep_assert_irqs_disabled(); 334 335 /* 336 * Do the NMI shootdown even if virtualization is off on _this_ CPU, as 337 * other CPUs may have virtualization enabled. 338 * 339 * TODO: Track whether or not virtualization might be enabled on other 340 * CPUs? May not be worth avoiding the NMI shootdown... 341 */ 342 virt_ops.emergency_disable_virtualization_cpu(); 343 return 0; 344 } 345 346 void __init x86_virt_init(void) 347 { 348 /* 349 * Attempt to initialize both SVM and VMX, and simply use whichever one 350 * is present. Rsefuse to enable/use SVM or VMX if both are somehow 351 * supported. No known CPU supports both SVM and VMX. 352 */ 353 bool has_vmx = !x86_vmx_init(); 354 bool has_svm = !x86_svm_init(); 355 356 if (WARN_ON_ONCE(has_vmx && has_svm)) { 357 x86_vmx_exit(); 358 memset(&virt_ops, 0, sizeof(virt_ops)); 359 } 360 } 361