xref: /linux/arch/x86/virt/hw.c (revision 0fc8f6200d2313278fbf4539bbab74677c685531)
1 // SPDX-License-Identifier: GPL-2.0-only
2 #include <linux/cpu.h>
3 #include <linux/cpumask.h>
4 #include <linux/errno.h>
5 #include <linux/kvm_types.h>
6 #include <linux/list.h>
7 #include <linux/percpu.h>
8 
9 #include <asm/perf_event.h>
10 #include <asm/processor.h>
11 #include <asm/virt.h>
12 #include <asm/vmx.h>
13 
14 struct x86_virt_ops {
15 	int feature;
16 	int (*enable_virtualization_cpu)(void);
17 	int (*disable_virtualization_cpu)(void);
18 	void (*emergency_disable_virtualization_cpu)(void);
19 };
20 static struct x86_virt_ops virt_ops __ro_after_init;
21 
22 __visible bool virt_rebooting;
23 EXPORT_SYMBOL_FOR_KVM(virt_rebooting);
24 
25 static DEFINE_PER_CPU(int, virtualization_nr_users);
26 
27 static cpu_emergency_virt_cb __rcu *kvm_emergency_callback;
28 
29 void x86_virt_register_emergency_callback(cpu_emergency_virt_cb *callback)
30 {
31 	if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback)))
32 		return;
33 
34 	rcu_assign_pointer(kvm_emergency_callback, callback);
35 }
36 EXPORT_SYMBOL_FOR_KVM(x86_virt_register_emergency_callback);
37 
38 void x86_virt_unregister_emergency_callback(cpu_emergency_virt_cb *callback)
39 {
40 	if (WARN_ON_ONCE(rcu_access_pointer(kvm_emergency_callback) != callback))
41 		return;
42 
43 	rcu_assign_pointer(kvm_emergency_callback, NULL);
44 	synchronize_rcu();
45 }
46 EXPORT_SYMBOL_FOR_KVM(x86_virt_unregister_emergency_callback);
47 
48 static void x86_virt_invoke_kvm_emergency_callback(void)
49 {
50 	cpu_emergency_virt_cb *kvm_callback;
51 
52 	kvm_callback = rcu_dereference(kvm_emergency_callback);
53 	if (kvm_callback)
54 		kvm_callback();
55 }
56 
57 #if IS_ENABLED(CONFIG_KVM_INTEL)
58 static DEFINE_PER_CPU(struct vmcs *, root_vmcs);
59 
60 static int x86_virt_cpu_vmxon(void)
61 {
62 	u64 vmxon_pointer = __pa(per_cpu(root_vmcs, raw_smp_processor_id()));
63 	u64 msr;
64 
65 	cr4_set_bits(X86_CR4_VMXE);
66 
67 	asm goto("1: vmxon %[vmxon_pointer]\n\t"
68 			  _ASM_EXTABLE(1b, %l[fault])
69 			  : : [vmxon_pointer] "m"(vmxon_pointer)
70 			  : : fault);
71 	return 0;
72 
73 fault:
74 	WARN_ONCE(1, "VMXON faulted, MSR_IA32_FEAT_CTL (0x3a) = 0x%llx\n",
75 		  rdmsrq_safe(MSR_IA32_FEAT_CTL, &msr) ? 0xdeadbeef : msr);
76 	cr4_clear_bits(X86_CR4_VMXE);
77 
78 	return -EFAULT;
79 }
80 
81 static int x86_vmx_enable_virtualization_cpu(void)
82 {
83 	int r;
84 
85 	if (cr4_read_shadow() & X86_CR4_VMXE)
86 		return -EBUSY;
87 
88 	intel_pt_handle_vmx(1);
89 
90 	r = x86_virt_cpu_vmxon();
91 	if (r) {
92 		intel_pt_handle_vmx(0);
93 		return r;
94 	}
95 
96 	return 0;
97 }
98 
99 /*
100  * Disable VMX and clear CR4.VMXE (even if VMXOFF faults)
101  *
102  * Note, VMXOFF causes a #UD if the CPU is !post-VMXON, but it's impossible to
103  * atomically track post-VMXON state, e.g. this may be called in NMI context.
104  * Eat all faults as all other faults on VMXOFF faults are mode related, i.e.
105  * faults are guaranteed to be due to the !post-VMXON check unless the CPU is
106  * magically in RM, VM86, compat mode, or at CPL>0.
107  */
108 static int x86_vmx_disable_virtualization_cpu(void)
109 {
110 	int r = -EIO;
111 
112 	asm goto("1: vmxoff\n\t"
113 		 _ASM_EXTABLE(1b, %l[fault])
114 		 ::: "cc", "memory" : fault);
115 	r = 0;
116 
117 fault:
118 	cr4_clear_bits(X86_CR4_VMXE);
119 	intel_pt_handle_vmx(0);
120 	return r;
121 }
122 
123 static void x86_vmx_emergency_disable_virtualization_cpu(void)
124 {
125 	virt_rebooting = true;
126 
127 	/*
128 	 * Note, CR4.VMXE can be _cleared_ in NMI context, but it can only be
129 	 * set in task context.  If this races with _another_ emergency call
130 	 * from NMI context, VMCLEAR (in KVM) and VMXOFF may #UD, but KVM and
131 	 * the kernel will eat those faults due to virt_rebooting being set by
132 	 * the interrupting NMI callback.
133 	 */
134 	if (!(__read_cr4() & X86_CR4_VMXE))
135 		return;
136 
137 	x86_virt_invoke_kvm_emergency_callback();
138 
139 	x86_vmx_disable_virtualization_cpu();
140 }
141 
142 static __init void x86_vmx_exit(void)
143 {
144 	int cpu;
145 
146 	for_each_possible_cpu(cpu) {
147 		free_page((unsigned long)per_cpu(root_vmcs, cpu));
148 		per_cpu(root_vmcs, cpu) = NULL;
149 	}
150 }
151 
152 static __init int __x86_vmx_init(void)
153 {
154 	const struct x86_virt_ops vmx_ops = {
155 		.feature = X86_FEATURE_VMX,
156 		.enable_virtualization_cpu = x86_vmx_enable_virtualization_cpu,
157 		.disable_virtualization_cpu = x86_vmx_disable_virtualization_cpu,
158 		.emergency_disable_virtualization_cpu = x86_vmx_emergency_disable_virtualization_cpu,
159 	};
160 
161 	u64 basic_msr;
162 	u32 rev_id;
163 	int cpu;
164 
165 	if (!cpu_feature_enabled(X86_FEATURE_VMX))
166 		return -EOPNOTSUPP;
167 
168 	rdmsrq(MSR_IA32_VMX_BASIC, basic_msr);
169 
170 	/* IA-32 SDM Vol 3B: VMCS size is never greater than 4kB. */
171 	if (WARN_ON_ONCE(vmx_basic_vmcs_size(basic_msr) > PAGE_SIZE))
172 		return -EIO;
173 
174 	/*
175 	 * Even if eVMCS is enabled (or will be enabled?), and even though not
176 	 * explicitly documented by TLFS, the root VMCS  passed to VMXON should
177 	 * still be marked with the revision_id reported by the physical CPU.
178 	 */
179 	rev_id = vmx_basic_vmcs_revision_id(basic_msr);
180 
181 	for_each_possible_cpu(cpu) {
182 		int node = cpu_to_node(cpu);
183 		struct page *page;
184 		struct vmcs *vmcs;
185 
186 		page = __alloc_pages_node(node, GFP_KERNEL | __GFP_ZERO, 0);
187 		if (WARN_ON_ONCE(!page)) {
188 			x86_vmx_exit();
189 			return -ENOMEM;
190 		}
191 
192 		vmcs = page_address(page);
193 		vmcs->hdr.revision_id = rev_id;
194 		per_cpu(root_vmcs, cpu) = vmcs;
195 	}
196 
197 	memcpy(&virt_ops, &vmx_ops, sizeof(virt_ops));
198 	return 0;
199 }
200 
201 static __init int x86_vmx_init(void)
202 {
203 	int r;
204 
205 	r = __x86_vmx_init();
206 	if (r)
207 		setup_clear_cpu_cap(X86_FEATURE_VMX);
208 	return r;
209 }
210 #else
211 static __init int x86_vmx_init(void) { return -EOPNOTSUPP; }
212 static __init void x86_vmx_exit(void) { }
213 #endif
214 
215 #if IS_ENABLED(CONFIG_KVM_AMD)
216 static int x86_svm_enable_virtualization_cpu(void)
217 {
218 	u64 efer;
219 
220 	rdmsrq(MSR_EFER, efer);
221 	if (efer & EFER_SVME)
222 		return -EBUSY;
223 
224 	wrmsrq(MSR_EFER, efer | EFER_SVME);
225 	return 0;
226 }
227 
228 static int x86_svm_disable_virtualization_cpu(void)
229 {
230 	int r = -EIO;
231 	u64 efer;
232 
233 	/*
234 	 * Force GIF=1 prior to disabling SVM, e.g. to ensure INIT and
235 	 * NMI aren't blocked.
236 	 */
237 	asm goto("1: stgi\n\t"
238 		 _ASM_EXTABLE(1b, %l[fault])
239 		 ::: "memory" : fault);
240 	r = 0;
241 
242 fault:
243 	rdmsrq(MSR_EFER, efer);
244 	wrmsrq(MSR_EFER, efer & ~EFER_SVME);
245 	return r;
246 }
247 
248 static void x86_svm_emergency_disable_virtualization_cpu(void)
249 {
250 	u64 efer;
251 
252 	virt_rebooting = true;
253 
254 	rdmsrq(MSR_EFER, efer);
255 	if (!(efer & EFER_SVME))
256 		return;
257 
258 	x86_virt_invoke_kvm_emergency_callback();
259 
260 	x86_svm_disable_virtualization_cpu();
261 }
262 
263 static __init int x86_svm_init(void)
264 {
265 	const struct x86_virt_ops svm_ops = {
266 		.feature = X86_FEATURE_SVM,
267 		.enable_virtualization_cpu = x86_svm_enable_virtualization_cpu,
268 		.disable_virtualization_cpu = x86_svm_disable_virtualization_cpu,
269 		.emergency_disable_virtualization_cpu = x86_svm_emergency_disable_virtualization_cpu,
270 	};
271 
272 	if (!cpu_feature_enabled(X86_FEATURE_SVM) ||
273 	    cc_platform_has(CC_ATTR_GUEST_MEM_ENCRYPT))
274 		return -EOPNOTSUPP;
275 
276 	memcpy(&virt_ops, &svm_ops, sizeof(virt_ops));
277 	return 0;
278 }
279 #else
280 static __init int x86_svm_init(void) { return -EOPNOTSUPP; }
281 #endif
282 
283 int x86_virt_get_ref(int feat)
284 {
285 	int r;
286 
287 	/* Ensure the !feature check can't get false positives. */
288 	BUILD_BUG_ON(!X86_FEATURE_SVM || !X86_FEATURE_VMX);
289 
290 	if (!virt_ops.feature || virt_ops.feature != feat)
291 		return -EOPNOTSUPP;
292 
293 	guard(preempt)();
294 
295 	if (this_cpu_inc_return(virtualization_nr_users) > 1)
296 		return 0;
297 
298 	r = virt_ops.enable_virtualization_cpu();
299 	if (r)
300 		WARN_ON_ONCE(this_cpu_dec_return(virtualization_nr_users));
301 
302 	return r;
303 }
304 EXPORT_SYMBOL_FOR_KVM(x86_virt_get_ref);
305 
306 void x86_virt_put_ref(int feat)
307 {
308 	guard(preempt)();
309 
310 	if (WARN_ON_ONCE(!this_cpu_read(virtualization_nr_users)) ||
311 	    this_cpu_dec_return(virtualization_nr_users))
312 		return;
313 
314 	BUG_ON(virt_ops.disable_virtualization_cpu() && !virt_rebooting);
315 }
316 EXPORT_SYMBOL_FOR_KVM(x86_virt_put_ref);
317 
318 /*
319  * Disable virtualization, i.e. VMX or SVM, to ensure INIT is recognized during
320  * reboot.  VMX blocks INIT if the CPU is post-VMXON, and SVM blocks INIT if
321  * GIF=0, i.e. if the crash occurred between CLGI and STGI.
322  */
323 int x86_virt_emergency_disable_virtualization_cpu(void)
324 {
325 	if (!virt_ops.feature)
326 		return -EOPNOTSUPP;
327 
328 	/*
329 	 * IRQs must be disabled as virtualization is enabled in hardware via
330 	 * function call IPIs, i.e. IRQs need to be disabled to guarantee
331 	 * virtualization stays disabled.
332 	 */
333 	lockdep_assert_irqs_disabled();
334 
335 	/*
336 	 * Do the NMI shootdown even if virtualization is off on _this_ CPU, as
337 	 * other CPUs may have virtualization enabled.
338 	 *
339 	 * TODO: Track whether or not virtualization might be enabled on other
340 	 *	 CPUs?  May not be worth avoiding the NMI shootdown...
341 	 */
342 	virt_ops.emergency_disable_virtualization_cpu();
343 	return 0;
344 }
345 
346 void __init x86_virt_init(void)
347 {
348 	/*
349 	 * Attempt to initialize both SVM and VMX, and simply use whichever one
350 	 * is present.  Rsefuse to enable/use SVM or VMX if both are somehow
351 	 * supported.  No known CPU supports both SVM and VMX.
352 	 */
353 	bool has_vmx = !x86_vmx_init();
354 	bool has_svm = !x86_svm_init();
355 
356 	if (WARN_ON_ONCE(has_vmx && has_svm)) {
357 		x86_vmx_exit();
358 		memset(&virt_ops, 0, sizeof(virt_ops));
359 	}
360 }
361