xref: /linux/arch/x86/kvm/vmx/vmenter.S (revision 51d90a15fedf8366cb96ef68d0ea2d0bf15417d2)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/linkage.h>
3#include <asm/asm.h>
4#include <asm/bitsperlong.h>
5#include <asm/kvm_vcpu_regs.h>
6#include <asm/nospec-branch.h>
7#include <asm/percpu.h>
8#include <asm/segment.h>
9#include "kvm-asm-offsets.h"
10#include "run_flags.h"
11
12#define WORD_SIZE (BITS_PER_LONG / 8)
13
14#define VCPU_RAX	__VCPU_REGS_RAX * WORD_SIZE
15#define VCPU_RCX	__VCPU_REGS_RCX * WORD_SIZE
16#define VCPU_RDX	__VCPU_REGS_RDX * WORD_SIZE
17#define VCPU_RBX	__VCPU_REGS_RBX * WORD_SIZE
18/* Intentionally omit RSP as it's context switched by hardware */
19#define VCPU_RBP	__VCPU_REGS_RBP * WORD_SIZE
20#define VCPU_RSI	__VCPU_REGS_RSI * WORD_SIZE
21#define VCPU_RDI	__VCPU_REGS_RDI * WORD_SIZE
22
23#ifdef CONFIG_X86_64
24#define VCPU_R8		__VCPU_REGS_R8  * WORD_SIZE
25#define VCPU_R9		__VCPU_REGS_R9  * WORD_SIZE
26#define VCPU_R10	__VCPU_REGS_R10 * WORD_SIZE
27#define VCPU_R11	__VCPU_REGS_R11 * WORD_SIZE
28#define VCPU_R12	__VCPU_REGS_R12 * WORD_SIZE
29#define VCPU_R13	__VCPU_REGS_R13 * WORD_SIZE
30#define VCPU_R14	__VCPU_REGS_R14 * WORD_SIZE
31#define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
32#endif
33
34.macro VMX_DO_EVENT_IRQOFF call_insn call_target
35	/*
36	 * Unconditionally create a stack frame, getting the correct RSP on the
37	 * stack (for x86-64) would take two instructions anyways, and RBP can
38	 * be used to restore RSP to make objtool happy (see below).
39	 */
40	push %_ASM_BP
41	mov %_ASM_SP, %_ASM_BP
42
43#ifdef CONFIG_X86_64
44	/*
45	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
46	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
47	 */
48	and  $-16, %rsp
49	push $__KERNEL_DS
50	push %rbp
51#endif
52	pushf
53	push $__KERNEL_CS
54	\call_insn \call_target
55
56	/*
57	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
58	 * the correct value.  objtool doesn't know the callee will IRET and,
59	 * without the explicit restore, thinks the stack is getting walloped.
60	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
61	 */
62	leave
63	RET
64.endm
65
66.section .noinstr.text, "ax"
67
68/**
69 * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
70 * @vmx:	struct vcpu_vmx *
71 * @regs:	unsigned long * (to guest registers)
72 * @flags:	VMX_RUN_VMRESUME:	use VMRESUME instead of VMLAUNCH
73 *		VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
74 *		VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO: vCPU can access host MMIO
75 *
76 * Returns:
77 *	0 on VM-Exit, 1 on VM-Fail
78 */
79SYM_FUNC_START(__vmx_vcpu_run)
80	push %_ASM_BP
81	mov  %_ASM_SP, %_ASM_BP
82#ifdef CONFIG_X86_64
83	push %r15
84	push %r14
85	push %r13
86	push %r12
87#else
88	push %edi
89	push %esi
90#endif
91	push %_ASM_BX
92
93	/* Save @vmx for SPEC_CTRL handling */
94	push %_ASM_ARG1
95
96	/* Save @flags (used for VMLAUNCH vs. VMRESUME and mitigations). */
97	push %_ASM_ARG3
98
99	/*
100	 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
101	 * @regs is needed after VM-Exit to save the guest's register values.
102	 */
103	push %_ASM_ARG2
104
105	lea (%_ASM_SP), %_ASM_ARG2
106	call vmx_update_host_rsp
107
108	ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
109
110	/*
111	 * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
112	 * host's, write the MSR.
113	 *
114	 * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
115	 * there must not be any returns or indirect branches between this code
116	 * and vmentry.
117	 */
118	mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
119#ifdef CONFIG_X86_64
120	mov VMX_spec_ctrl(%rdi), %rdx
121	cmp PER_CPU_VAR(x86_spec_ctrl_current), %rdx
122	je .Lspec_ctrl_done
123	movl %edx, %eax
124	shr $32, %rdx
125#else
126	mov VMX_spec_ctrl(%edi), %eax
127	mov PER_CPU_VAR(x86_spec_ctrl_current), %ecx
128	xor %eax, %ecx
129	mov VMX_spec_ctrl + 4(%edi), %edx
130	mov PER_CPU_VAR(x86_spec_ctrl_current + 4), %edi
131	xor %edx, %edi
132	or %edi, %ecx
133	je .Lspec_ctrl_done
134#endif
135	mov $MSR_IA32_SPEC_CTRL, %ecx
136	wrmsr
137
138.Lspec_ctrl_done:
139
140	/*
141	 * Since vmentry is serializing on affected CPUs, there's no need for
142	 * an LFENCE to stop speculation from skipping the wrmsr.
143	 */
144
145	/* Load @regs to RAX. */
146	mov (%_ASM_SP), %_ASM_AX
147
148	/* Load guest registers.  Don't clobber flags. */
149	mov VCPU_RCX(%_ASM_AX), %_ASM_CX
150	mov VCPU_RDX(%_ASM_AX), %_ASM_DX
151	mov VCPU_RBX(%_ASM_AX), %_ASM_BX
152	mov VCPU_RBP(%_ASM_AX), %_ASM_BP
153	mov VCPU_RSI(%_ASM_AX), %_ASM_SI
154	mov VCPU_RDI(%_ASM_AX), %_ASM_DI
155#ifdef CONFIG_X86_64
156	mov VCPU_R8 (%_ASM_AX),  %r8
157	mov VCPU_R9 (%_ASM_AX),  %r9
158	mov VCPU_R10(%_ASM_AX), %r10
159	mov VCPU_R11(%_ASM_AX), %r11
160	mov VCPU_R12(%_ASM_AX), %r12
161	mov VCPU_R13(%_ASM_AX), %r13
162	mov VCPU_R14(%_ASM_AX), %r14
163	mov VCPU_R15(%_ASM_AX), %r15
164#endif
165	/* Load guest RAX.  This kills the @regs pointer! */
166	mov VCPU_RAX(%_ASM_AX), %_ASM_AX
167
168	/*
169	 * Note, ALTERNATIVE_2 works in reverse order.  If CLEAR_CPU_BUF_VM is
170	 * enabled, do VERW unconditionally.  If CPU_BUF_VM_MMIO is enabled,
171	 * check @flags to see if the vCPU has access to host MMIO, and if so,
172	 * do VERW.  Else, do nothing (no mitigations needed/enabled).
173	 */
174	ALTERNATIVE_2 "",									  \
175		      __stringify(testl $VMX_RUN_CLEAR_CPU_BUFFERS_FOR_MMIO, WORD_SIZE(%_ASM_SP); \
176				  jz .Lskip_mmio_verw;						  \
177				  VERW;								  \
178				  .Lskip_mmio_verw:),					  	  \
179		      X86_FEATURE_CLEAR_CPU_BUF_VM_MMIO,					  \
180		      __stringify(VERW), X86_FEATURE_CLEAR_CPU_BUF_VM
181
182	/* Check @flags to see if VMLAUNCH or VMRESUME is needed. */
183	testl $VMX_RUN_VMRESUME, WORD_SIZE(%_ASM_SP)
184	jz .Lvmlaunch
185
186	/*
187	 * After a successful VMRESUME/VMLAUNCH, control flow "magically"
188	 * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
189	 * So this isn't a typical function and objtool needs to be told to
190	 * save the unwind state here and restore it below.
191	 */
192	UNWIND_HINT_SAVE
193
194/*
195 * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
196 * the 'vmx_vmexit' label below.
197 */
198.Lvmresume:
199	vmresume
200	jmp .Lvmfail
201
202.Lvmlaunch:
203	vmlaunch
204	jmp .Lvmfail
205
206	_ASM_EXTABLE(.Lvmresume, .Lfixup)
207	_ASM_EXTABLE(.Lvmlaunch, .Lfixup)
208
209SYM_INNER_LABEL_ALIGN(vmx_vmexit, SYM_L_GLOBAL)
210
211	/* Restore unwind state from before the VMRESUME/VMLAUNCH. */
212	UNWIND_HINT_RESTORE
213	ENDBR
214
215	/* Temporarily save guest's RAX. */
216	push %_ASM_AX
217
218	/* Reload @regs to RAX. */
219	mov WORD_SIZE(%_ASM_SP), %_ASM_AX
220
221	/* Save all guest registers, including RAX from the stack */
222	pop           VCPU_RAX(%_ASM_AX)
223	mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
224	mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
225	mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
226	mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
227	mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
228	mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
229#ifdef CONFIG_X86_64
230	mov %r8,  VCPU_R8 (%_ASM_AX)
231	mov %r9,  VCPU_R9 (%_ASM_AX)
232	mov %r10, VCPU_R10(%_ASM_AX)
233	mov %r11, VCPU_R11(%_ASM_AX)
234	mov %r12, VCPU_R12(%_ASM_AX)
235	mov %r13, VCPU_R13(%_ASM_AX)
236	mov %r14, VCPU_R14(%_ASM_AX)
237	mov %r15, VCPU_R15(%_ASM_AX)
238#endif
239
240	/* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
241	xor %ebx, %ebx
242
243.Lclear_regs:
244	/* Discard @regs.  The register is irrelevant, it just can't be RBX. */
245	pop %_ASM_AX
246
247	/*
248	 * Clear all general purpose registers except RSP and RBX to prevent
249	 * speculative use of the guest's values, even those that are reloaded
250	 * via the stack.  In theory, an L1 cache miss when restoring registers
251	 * could lead to speculative execution with the guest's values.
252	 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
253	 * free.  RSP and RBX are exempt as RSP is restored by hardware during
254	 * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
255	 * value.
256	 */
257	xor %eax, %eax
258	xor %ecx, %ecx
259	xor %edx, %edx
260	xor %ebp, %ebp
261	xor %esi, %esi
262	xor %edi, %edi
263#ifdef CONFIG_X86_64
264	xor %r8d,  %r8d
265	xor %r9d,  %r9d
266	xor %r10d, %r10d
267	xor %r11d, %r11d
268	xor %r12d, %r12d
269	xor %r13d, %r13d
270	xor %r14d, %r14d
271	xor %r15d, %r15d
272#endif
273
274	/*
275	 * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
276	 * the first unbalanced RET after vmexit!
277	 *
278	 * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
279	 * entries and (in some cases) RSB underflow.
280	 *
281	 * eIBRS has its own protection against poisoned RSB, so it doesn't
282	 * need the RSB filling sequence.  But it does need to be enabled, and a
283	 * single call to retire, before the first unbalanced RET.
284	 */
285
286	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
287			   X86_FEATURE_RSB_VMEXIT_LITE
288
289	pop %_ASM_ARG2	/* @flags */
290	pop %_ASM_ARG1	/* @vmx */
291
292	call vmx_spec_ctrl_restore_host
293
294	CLEAR_BRANCH_HISTORY_VMEXIT
295
296	/* Put return value in AX */
297	mov %_ASM_BX, %_ASM_AX
298
299	pop %_ASM_BX
300#ifdef CONFIG_X86_64
301	pop %r12
302	pop %r13
303	pop %r14
304	pop %r15
305#else
306	pop %esi
307	pop %edi
308#endif
309	pop %_ASM_BP
310	RET
311
312.Lfixup:
313	cmpb $0, _ASM_RIP(kvm_rebooting)
314	jne .Lvmfail
315	ud2
316.Lvmfail:
317	/* VM-Fail: set return value to 1 */
318	mov $1, %_ASM_BX
319	jmp .Lclear_regs
320
321SYM_FUNC_END(__vmx_vcpu_run)
322
323SYM_FUNC_START(vmx_do_nmi_irqoff)
324	VMX_DO_EVENT_IRQOFF call asm_exc_nmi_kvm_vmx
325SYM_FUNC_END(vmx_do_nmi_irqoff)
326
327#ifndef CONFIG_CC_HAS_ASM_GOTO_OUTPUT
328
329/**
330 * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
331 * @field:	VMCS field encoding that failed
332 * @fault:	%true if the VMREAD faulted, %false if it failed
333 *
334 * Save and restore volatile registers across a call to vmread_error().  Note,
335 * all parameters are passed on the stack.
336 */
337SYM_FUNC_START(vmread_error_trampoline)
338	push %_ASM_BP
339	mov  %_ASM_SP, %_ASM_BP
340
341	push %_ASM_AX
342	push %_ASM_CX
343	push %_ASM_DX
344#ifdef CONFIG_X86_64
345	push %rdi
346	push %rsi
347	push %r8
348	push %r9
349	push %r10
350	push %r11
351#endif
352
353	/* Load @field and @fault to arg1 and arg2 respectively. */
354	mov 3*WORD_SIZE(%_ASM_BP), %_ASM_ARG2
355	mov 2*WORD_SIZE(%_ASM_BP), %_ASM_ARG1
356
357	call vmread_error_trampoline2
358
359	/* Zero out @fault, which will be popped into the result register. */
360	_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
361
362#ifdef CONFIG_X86_64
363	pop %r11
364	pop %r10
365	pop %r9
366	pop %r8
367	pop %rsi
368	pop %rdi
369#endif
370	pop %_ASM_DX
371	pop %_ASM_CX
372	pop %_ASM_AX
373	pop %_ASM_BP
374
375	RET
376SYM_FUNC_END(vmread_error_trampoline)
377#endif
378
379.section .text, "ax"
380
381#ifndef CONFIG_X86_FRED
382
383SYM_FUNC_START(vmx_do_interrupt_irqoff)
384	VMX_DO_EVENT_IRQOFF CALL_NOSPEC _ASM_ARG1
385SYM_FUNC_END(vmx_do_interrupt_irqoff)
386
387#endif
388