xref: /linux/arch/x86/virt/hw.c (revision d0ee290071b475410476b4126c72da4bf6a2194c)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/cpu.h>
3 #include <linux/cpumask.h>
4 #include <linux/errno.h>
5 #include <linux/kvm_types.h>
6 #include <linux/list.h>
7 #include <linux/percpu.h>
8 
9 #include <asm/perf_event.h>
10 #include <asm/processor.h>
11 #include <asm/virt.h>
12 #include <asm/vmx.h>
13 
14 struct x86_virt_ops {
15 	int feature;
16 	int (*enable_virtualization_cpu)(void);
17 	int (*disable_virtualization_cpu)(void);
18 	void (*emergency_disable_virtualization_cpu)(void);
19 };
20 static struct x86_virt_ops virt_ops __ro_after_init;
21 
22 __visible bool virt_rebooting;
23 EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
24 
25 static DEFINE_PER_CPU(int, virtualization_nr_users);
26 
27 static cpu_emergency_virt_cb __rcu *kvm_emergency_callback;
28 
x86_virt_register_emergency_callback(cpu_emergency_virt_cb * callback)29 void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback)
30 {
31 	if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback)))
32 		return;
33 
34 	rcu_assign_pointer(kvm_emergency_callback, callback);
35 }
36 EXPORT_SYMBOL_FOR_KVM(x86_virt_register_emergency_callback);
37 
x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb * callback)38 void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback)
39 {
40 	if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback) != callback))
41 		return;
42 
43 	rcu_assign_pointer(kvm_emergency_callback, NULL);
44 	synchronize_rcu();
45 }
46 EXPORT_SYMBOL_FOR_KVM(x86_virt_unregister_emergency_callback);
47 
x86_virt_invoke_kvm_emergency_callback(void)48 static void x86_virt_invoke_kvm_emergency_callback(void)
49 {
50 	cpu_emergency_virt_cb *kvm_callback;
51 
52 	/*
53 	 * RCU may not be watching the crashing CPU here, so rcu_dereference()
54 	 * triggers a suspicious-RCU-usage splat. In principle, a concurrent
55 	 * KVM module unload could race with this read; see commit 2baa33a8ddd6
56 	 * ("KVM: x86: Leave user-return notifier registered on reboot/shutdown")
57 	 * which notes that nothing prevents module unload during panic/reboot.
58 	 *
59 	 * However, taking a lock here would be riskier than the current race:
60 	 * the system is going down via NMI shootdown, and any lock could be
61 	 * held by an already-stopped CPU. Use rcu_dereference_raw() to silence
62 	 * the lockdep splat and accept the comically small remaining race;
63 	 * panic context inherently cannot guarantee complete correctness.
64 	 */
65 	kvm_callback = rcu_dereference_raw(kvm_emergency_callback);
66 	if (kvm_callback)
67 		kvm_callback();
68 }
69 
70 #if IS_ENABLED(CONFIG_KVM_INTEL)
71 static DEFINE_PER_CPU(struct vmcs *, root_vmcs);
72 
x86_virt_cpu_vmxon(void)73 static int x86_virt_cpu_vmxon(void)
74 {
75 	u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id()));
76 	u64 msr;
77 
78 	cr4_set_bits(X86_CR4_VMXE);
79 
80 	asm goto("1: vmxon %[vmxon_pointer]\n\t"
81 			  _ASM_EXTABLE(1b, %l[fault])
82 			  : : [vmxon_pointer] "m"(vmxon_pointer)
83 			  : : fault);
84 	return 0;
85 
86 fault:
87 	WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
88 		  rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
89 	cr4_clear_bits(X86_CR4_VMXE);
90 
91 	return -EFAULT;
92 }
93 
x86_vmx_enable_virtualization_cpu(void)94 static int x86_vmx_enable_virtualization_cpu(void)
95 {
96 	int r;
97 
98 	if (cr4_read_shadow() & X86_CR4_VMXE)
99 		return -EBUSY;
100 
101 	intel_pt_handle_vmx(1);
102 
103 	r = x86_virt_cpu_vmxon();
104 	if (r) {
105 		intel_pt_handle_vmx(0);
106 		return r;
107 	}
108 
109 	return 0;
110 }
111 
112 /*
113  * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
114  *
115  * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
116  * atomically track post-VMXON state, e.g. this may be called in NMI context.
117  * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
118  * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
119  * magically in RM, VM86, compat mode, or at CPL>0.
120  */
x86_vmx_disable_virtualization_cpu(void)121 static int x86_vmx_disable_virtualization_cpu(void)
122 {
123 	int r = -EIO;
124 
125 	asm goto("1: vmxoff\n\t"
126 		 _ASM_EXTABLE(1b, %l[fault])
127 		 ::: "cc", "memory" : fault);
128 	r = 0;
129 
130 fault:
131 	cr4_clear_bits(X86_CR4_VMXE);
132 	intel_pt_handle_vmx(0);
133 	return r;
134 }
135 
x86_vmx_emergency_disable_virtualization_cpu(void)136 static void x86_vmx_emergency_disable_virtualization_cpu(void)
137 {
138 	virt_rebooting = true;
139 
140 	/*
141 	 * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
142 	 * set in task context.  If this races with _another_ emergency call
143 	 * from NMI context, VMCLEAR (in KVM) and VMXOFF may #UD, but KVM and
144 	 * the kernel will eat those faults due to virt_rebooting being set by
145 	 * the interrupting NMI callback.
146 	 */
147 	if (!(__read_cr4() & X86_CR4_VMXE))
148 		return;
149 
150 	x86_virt_invoke_kvm_emergency_callback();
151 
152 	x86_vmx_disable_virtualization_cpu();
153 }
154 
x86_vmx_exit(void)155 static __init void x86_vmx_exit(void)
156 {
157 	int cpu;
158 
159 	for_each_possible_cpu(cpu) {
160 		free_page((unsigned long)per_cpu(root_vmcs, cpu));
161 		per_cpu(root_vmcs, cpu) = NULL;
162 	}
163 }
164 
__x86_vmx_init(void)165 static __init int __x86_vmx_init(void)
166 {
167 	const struct x86_virt_ops vmx_ops = {
168 		.feature = X86_FEATURE_VMX,
169 		.enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu,
170 		.disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu,
171 		.emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu,
172 	};
173 
174 	u64 basic_msr;
175 	u32 rev_id;
176 	int cpu;
177 
178 	if (!cpu_feature_enabled(X86_FEATURE_VMX))
179 		return -EOPNOTSUPP;
180 
181 	rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
182 
183 	/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
184 	if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE))
185 		return -EIO;
186 
187 	/*
188 	 * Even if eVMCS is enabled (or will be enabled?), and even though not
189 	 * explicitly documented by TLFS, the root VMCS  passed to VMXON should
190 	 * still be marked with the revision_id reported by the physical CPU.
191 	 */
192 	rev_id = vmx_basic_vmcs_revision_id(basic_msr);
193 
194 	for_each_possible_cpu(cpu) {
195 		int node = cpu_to_node(cpu);
196 		struct page *page;
197 		struct vmcs *vmcs;
198 
199 		page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
200 		if (WARN_ON_ONCE(!page)) {
201 			x86_vmx_exit();
202 			return -ENOMEM;
203 		}
204 
205 		vmcs = page_address(page);
206 		vmcs->hdr.revision_id = rev_id;
207 		per_cpu(root_vmcs, cpu) = vmcs;
208 	}
209 
210 	memcpy(&virt_ops, &vmx_ops, sizeof(virt_ops));
211 	return 0;
212 }
213 
x86_vmx_init(void)214 static __init int x86_vmx_init(void)
215 {
216 	int r;
217 
218 	r = __x86_vmx_init();
219 	if (r)
220 		setup_clear_cpu_cap(X86_FEATURE_VMX);
221 	return r;
222 }
223 #else
x86_vmx_init(void)224 static __init int x86_vmx_init(void) { return -EOPNOTSUPP; }
x86_vmx_exit(void)225 static __init void x86_vmx_exit(void) { }
226 #endif
227 
228 #if IS_ENABLED(CONFIG_KVM_AMD)
x86_svm_enable_virtualization_cpu(void)229 static int x86_svm_enable_virtualization_cpu(void)
230 {
231 	u64 efer;
232 
233 	rdmsrq(MSR_EFER, efer);
234 	if (efer & EFER_SVME)
235 		return -EBUSY;
236 
237 	wrmsrq(MSR_EFER, efer | EFER_SVME);
238 	return 0;
239 }
240 
x86_svm_disable_virtualization_cpu(void)241 static int x86_svm_disable_virtualization_cpu(void)
242 {
243 	int r = -EIO;
244 	u64 efer;
245 
246 	/*
247 	 * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
248 	 * NMI aren't blocked.
249 	 */
250 	asm goto("1: stgi\n\t"
251 		 _ASM_EXTABLE(1b, %l[fault])
252 		 ::: "memory" : fault);
253 	r = 0;
254 
255 fault:
256 	rdmsrq(MSR_EFER, efer);
257 	wrmsrq(MSR_EFER, efer & ~EFER_SVME);
258 	return r;
259 }
260 
x86_svm_emergency_disable_virtualization_cpu(void)261 static void x86_svm_emergency_disable_virtualization_cpu(void)
262 {
263 	u64 efer;
264 
265 	virt_rebooting = true;
266 
267 	rdmsrq(MSR_EFER, efer);
268 	if (!(efer & EFER_SVME))
269 		return;
270 
271 	x86_virt_invoke_kvm_emergency_callback();
272 
273 	x86_svm_disable_virtualization_cpu();
274 }
275 
x86_svm_init(void)276 static __init int x86_svm_init(void)
277 {
278 	const struct x86_virt_ops svm_ops = {
279 		.feature = X86_FEATURE_SVM,
280 		.enable_virtualization_cpu = x86_svm_enable_virtualization_cpu,
281 		.disable_virtualization_cpu = x86_svm_disable_virtualization_cpu,
282 		.emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu,
283 	};
284 
285 	if (!cpu_feature_enabled(X86_FEATURE_SVM) ||
286 	    cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
287 		return -EOPNOTSUPP;
288 
289 	memcpy(&virt_ops, &svm_ops, sizeof(virt_ops));
290 	return 0;
291 }
292 #else
x86_svm_init(void)293 static __init int x86_svm_init(void) { return -EOPNOTSUPP; }
294 #endif
295 
x86_virt_get_ref(int feat)296 int x86_virt_get_ref(int feat)
297 {
298 	int r;
299 
300 	/* Ensure the !feature check can't get false positives. */
301 	BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX);
302 
303 	if (!virt_ops.feature || virt_ops.feature != feat)
304 		return -EOPNOTSUPP;
305 
306 	guard(preempt)();
307 
308 	if (this_cpu_inc_return(virtualization_nr_users) > 1)
309 		return 0;
310 
311 	r = virt_ops.enable_virtualization_cpu();
312 	if (r)
313 		WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users));
314 
315 	return r;
316 }
317 EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref);
318 
x86_virt_put_ref(int feat)319 void x86_virt_put_ref(int feat)
320 {
321 	guard(preempt)();
322 
323 	if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) ||
324 	    this_cpu_dec_return(virtualization_nr_users))
325 		return;
326 
327 	BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting);
328 }
329 EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref);
330 
331 /*
332  * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
333  * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
334  * GIF=0, i.e. if the crash occurred between CLGI and STGI.
335  */
x86_virt_emergency_disable_virtualization_cpu(void)336 int x86_virt_emergency_disable_virtualization_cpu(void)
337 {
338 	if (!virt_ops.feature)
339 		return -EOPNOTSUPP;
340 
341 	/*
342 	 * IRQs must be disabled as virtualization is enabled in hardware via
343 	 * function call IPIs, i.e. IRQs need to be disabled to guarantee
344 	 * virtualization stays disabled.
345 	 */
346 	lockdep_assert_irqs_disabled();
347 
348 	/*
349 	 * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
350 	 * other CPUs may have virtualization enabled.
351 	 *
352 	 * TODO: Track whether or not virtualization might be enabled on other
353 	 *	 CPUs?  May not be worth avoiding the NMI shootdown...
354 	 */
355 	virt_ops.emergency_disable_virtualization_cpu();
356 	return 0;
357 }
358 
x86_virt_init(void)359 void __init x86_virt_init(void)
360 {
361 	/*
362 	 * Attempt to initialize both SVM and VMX, and simply use whichever one
363 	 * is present.  Rsefuse to enable/use SVM or VMX if both are somehow
364 	 * supported.  No known CPU supports both SVM and VMX.
365 	 */
366 	bool has_vmx = !x86_vmx_init();
367 	bool has_svm = !x86_svm_init();
368 
369 	if (WARN_ON_ONCE(has_vmx && has_svm)) {
370 		x86_vmx_exit();
371 		memset(&virt_ops, 0, sizeof(virt_ops));
372 	}
373 }
374