xref: /linux/arch/x86/kvm/vmx/vmenter.S (revision 67feaba413ec68daf4124e9870878899b4ed9a0e)
1/* SPDX-License-Identifier: GPL-2.0 */
2#include <linux/linkage.h>
3#include <asm/asm.h>
4#include <asm/asm-offsets.h>
5#include <asm/bitsperlong.h>
6#include <asm/kvm_vcpu_regs.h>
7#include <asm/nospec-branch.h>
8#include <asm/percpu.h>
9#include <asm/segment.h>
10#include "run_flags.h"
11
12#define WORD_SIZE (BITS_PER_LONG / 8)
13
14#define VCPU_RAX	__VCPU_REGS_RAX * WORD_SIZE
15#define VCPU_RCX	__VCPU_REGS_RCX * WORD_SIZE
16#define VCPU_RDX	__VCPU_REGS_RDX * WORD_SIZE
17#define VCPU_RBX	__VCPU_REGS_RBX * WORD_SIZE
18/* Intentionally omit RSP as it's context switched by hardware */
19#define VCPU_RBP	__VCPU_REGS_RBP * WORD_SIZE
20#define VCPU_RSI	__VCPU_REGS_RSI * WORD_SIZE
21#define VCPU_RDI	__VCPU_REGS_RDI * WORD_SIZE
22
23#ifdef CONFIG_X86_64
24#define VCPU_R8		__VCPU_REGS_R8  * WORD_SIZE
25#define VCPU_R9		__VCPU_REGS_R9  * WORD_SIZE
26#define VCPU_R10	__VCPU_REGS_R10 * WORD_SIZE
27#define VCPU_R11	__VCPU_REGS_R11 * WORD_SIZE
28#define VCPU_R12	__VCPU_REGS_R12 * WORD_SIZE
29#define VCPU_R13	__VCPU_REGS_R13 * WORD_SIZE
30#define VCPU_R14	__VCPU_REGS_R14 * WORD_SIZE
31#define VCPU_R15	__VCPU_REGS_R15 * WORD_SIZE
32#endif
33
34.section .noinstr.text, "ax"
35
36/**
37 * __vmx_vcpu_run - Run a vCPU via a transition to VMX guest mode
38 * @vmx:	struct vcpu_vmx *
39 * @regs:	unsigned long * (to guest registers)
40 * @flags:	VMX_RUN_VMRESUME:	use VMRESUME instead of VMLAUNCH
41 *		VMX_RUN_SAVE_SPEC_CTRL: save guest SPEC_CTRL into vmx->spec_ctrl
42 *
43 * Returns:
44 *	0 on VM-Exit, 1 on VM-Fail
45 */
46SYM_FUNC_START(__vmx_vcpu_run)
47	push %_ASM_BP
48	mov  %_ASM_SP, %_ASM_BP
49#ifdef CONFIG_X86_64
50	push %r15
51	push %r14
52	push %r13
53	push %r12
54#else
55	push %edi
56	push %esi
57#endif
58	push %_ASM_BX
59
60	/* Save @vmx for SPEC_CTRL handling */
61	push %_ASM_ARG1
62
63	/* Save @flags for SPEC_CTRL handling */
64	push %_ASM_ARG3
65
66	/*
67	 * Save @regs, _ASM_ARG2 may be modified by vmx_update_host_rsp() and
68	 * @regs is needed after VM-Exit to save the guest's register values.
69	 */
70	push %_ASM_ARG2
71
72	/* Copy @flags to BL, _ASM_ARG3 is volatile. */
73	mov %_ASM_ARG3B, %bl
74
75	lea (%_ASM_SP), %_ASM_ARG2
76	call vmx_update_host_rsp
77
78	ALTERNATIVE "jmp .Lspec_ctrl_done", "", X86_FEATURE_MSR_SPEC_CTRL
79
80	/*
81	 * SPEC_CTRL handling: if the guest's SPEC_CTRL value differs from the
82	 * host's, write the MSR.
83	 *
84	 * IMPORTANT: To avoid RSB underflow attacks and any other nastiness,
85	 * there must not be any returns or indirect branches between this code
86	 * and vmentry.
87	 */
88	mov 2*WORD_SIZE(%_ASM_SP), %_ASM_DI
89	movl VMX_spec_ctrl(%_ASM_DI), %edi
90	movl PER_CPU_VAR(x86_spec_ctrl_current), %esi
91	cmp %edi, %esi
92	je .Lspec_ctrl_done
93	mov $MSR_IA32_SPEC_CTRL, %ecx
94	xor %edx, %edx
95	mov %edi, %eax
96	wrmsr
97
98.Lspec_ctrl_done:
99
100	/*
101	 * Since vmentry is serializing on affected CPUs, there's no need for
102	 * an LFENCE to stop speculation from skipping the wrmsr.
103	 */
104
105	/* Load @regs to RAX. */
106	mov (%_ASM_SP), %_ASM_AX
107
108	/* Check if vmlaunch or vmresume is needed */
109	testb $VMX_RUN_VMRESUME, %bl
110
111	/* Load guest registers.  Don't clobber flags. */
112	mov VCPU_RCX(%_ASM_AX), %_ASM_CX
113	mov VCPU_RDX(%_ASM_AX), %_ASM_DX
114	mov VCPU_RBX(%_ASM_AX), %_ASM_BX
115	mov VCPU_RBP(%_ASM_AX), %_ASM_BP
116	mov VCPU_RSI(%_ASM_AX), %_ASM_SI
117	mov VCPU_RDI(%_ASM_AX), %_ASM_DI
118#ifdef CONFIG_X86_64
119	mov VCPU_R8 (%_ASM_AX),  %r8
120	mov VCPU_R9 (%_ASM_AX),  %r9
121	mov VCPU_R10(%_ASM_AX), %r10
122	mov VCPU_R11(%_ASM_AX), %r11
123	mov VCPU_R12(%_ASM_AX), %r12
124	mov VCPU_R13(%_ASM_AX), %r13
125	mov VCPU_R14(%_ASM_AX), %r14
126	mov VCPU_R15(%_ASM_AX), %r15
127#endif
128	/* Load guest RAX.  This kills the @regs pointer! */
129	mov VCPU_RAX(%_ASM_AX), %_ASM_AX
130
131	/* Check EFLAGS.ZF from 'testb' above */
132	jz .Lvmlaunch
133
134	/*
135	 * After a successful VMRESUME/VMLAUNCH, control flow "magically"
136	 * resumes below at 'vmx_vmexit' due to the VMCS HOST_RIP setting.
137	 * So this isn't a typical function and objtool needs to be told to
138	 * save the unwind state here and restore it below.
139	 */
140	UNWIND_HINT_SAVE
141
142/*
143 * If VMRESUME/VMLAUNCH and corresponding vmexit succeed, execution resumes at
144 * the 'vmx_vmexit' label below.
145 */
146.Lvmresume:
147	vmresume
148	jmp .Lvmfail
149
150.Lvmlaunch:
151	vmlaunch
152	jmp .Lvmfail
153
154	_ASM_EXTABLE(.Lvmresume, .Lfixup)
155	_ASM_EXTABLE(.Lvmlaunch, .Lfixup)
156
157SYM_INNER_LABEL(vmx_vmexit, SYM_L_GLOBAL)
158
159	/* Restore unwind state from before the VMRESUME/VMLAUNCH. */
160	UNWIND_HINT_RESTORE
161	ENDBR
162
163	/* Temporarily save guest's RAX. */
164	push %_ASM_AX
165
166	/* Reload @regs to RAX. */
167	mov WORD_SIZE(%_ASM_SP), %_ASM_AX
168
169	/* Save all guest registers, including RAX from the stack */
170	pop           VCPU_RAX(%_ASM_AX)
171	mov %_ASM_CX, VCPU_RCX(%_ASM_AX)
172	mov %_ASM_DX, VCPU_RDX(%_ASM_AX)
173	mov %_ASM_BX, VCPU_RBX(%_ASM_AX)
174	mov %_ASM_BP, VCPU_RBP(%_ASM_AX)
175	mov %_ASM_SI, VCPU_RSI(%_ASM_AX)
176	mov %_ASM_DI, VCPU_RDI(%_ASM_AX)
177#ifdef CONFIG_X86_64
178	mov %r8,  VCPU_R8 (%_ASM_AX)
179	mov %r9,  VCPU_R9 (%_ASM_AX)
180	mov %r10, VCPU_R10(%_ASM_AX)
181	mov %r11, VCPU_R11(%_ASM_AX)
182	mov %r12, VCPU_R12(%_ASM_AX)
183	mov %r13, VCPU_R13(%_ASM_AX)
184	mov %r14, VCPU_R14(%_ASM_AX)
185	mov %r15, VCPU_R15(%_ASM_AX)
186#endif
187
188	/* Clear return value to indicate VM-Exit (as opposed to VM-Fail). */
189	xor %ebx, %ebx
190
191.Lclear_regs:
192	/*
193	 * Clear all general purpose registers except RSP and RBX to prevent
194	 * speculative use of the guest's values, even those that are reloaded
195	 * via the stack.  In theory, an L1 cache miss when restoring registers
196	 * could lead to speculative execution with the guest's values.
197	 * Zeroing XORs are dirt cheap, i.e. the extra paranoia is essentially
198	 * free.  RSP and RAX are exempt as RSP is restored by hardware during
199	 * VM-Exit and RBX is explicitly loaded with 0 or 1 to hold the return
200	 * value.
201	 */
202	xor %eax, %eax
203	xor %ecx, %ecx
204	xor %edx, %edx
205	xor %ebp, %ebp
206	xor %esi, %esi
207	xor %edi, %edi
208#ifdef CONFIG_X86_64
209	xor %r8d,  %r8d
210	xor %r9d,  %r9d
211	xor %r10d, %r10d
212	xor %r11d, %r11d
213	xor %r12d, %r12d
214	xor %r13d, %r13d
215	xor %r14d, %r14d
216	xor %r15d, %r15d
217#endif
218
219	/* "POP" @regs. */
220	add $WORD_SIZE, %_ASM_SP
221
222	/*
223	 * IMPORTANT: RSB filling and SPEC_CTRL handling must be done before
224	 * the first unbalanced RET after vmexit!
225	 *
226	 * For retpoline or IBRS, RSB filling is needed to prevent poisoned RSB
227	 * entries and (in some cases) RSB underflow.
228	 *
229	 * eIBRS has its own protection against poisoned RSB, so it doesn't
230	 * need the RSB filling sequence.  But it does need to be enabled, and a
231	 * single call to retire, before the first unbalanced RET.
232         */
233
234	FILL_RETURN_BUFFER %_ASM_CX, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_VMEXIT,\
235			   X86_FEATURE_RSB_VMEXIT_LITE
236
237
238	pop %_ASM_ARG2	/* @flags */
239	pop %_ASM_ARG1	/* @vmx */
240
241	call vmx_spec_ctrl_restore_host
242
243	/* Put return value in AX */
244	mov %_ASM_BX, %_ASM_AX
245
246	pop %_ASM_BX
247#ifdef CONFIG_X86_64
248	pop %r12
249	pop %r13
250	pop %r14
251	pop %r15
252#else
253	pop %esi
254	pop %edi
255#endif
256	pop %_ASM_BP
257	RET
258
259.Lfixup:
260	cmpb $0, kvm_rebooting
261	jne .Lvmfail
262	ud2
263.Lvmfail:
264	/* VM-Fail: set return value to 1 */
265	mov $1, %_ASM_BX
266	jmp .Lclear_regs
267
268SYM_FUNC_END(__vmx_vcpu_run)
269
270
271.section .text, "ax"
272
273/**
274 * vmread_error_trampoline - Trampoline from inline asm to vmread_error()
275 * @field:	VMCS field encoding that failed
276 * @fault:	%true if the VMREAD faulted, %false if it failed
277
278 * Save and restore volatile registers across a call to vmread_error().  Note,
279 * all parameters are passed on the stack.
280 */
281SYM_FUNC_START(vmread_error_trampoline)
282	push %_ASM_BP
283	mov  %_ASM_SP, %_ASM_BP
284
285	push %_ASM_AX
286	push %_ASM_CX
287	push %_ASM_DX
288#ifdef CONFIG_X86_64
289	push %rdi
290	push %rsi
291	push %r8
292	push %r9
293	push %r10
294	push %r11
295#endif
296#ifdef CONFIG_X86_64
297	/* Load @field and @fault to arg1 and arg2 respectively. */
298	mov 3*WORD_SIZE(%rbp), %_ASM_ARG2
299	mov 2*WORD_SIZE(%rbp), %_ASM_ARG1
300#else
301	/* Parameters are passed on the stack for 32-bit (see asmlinkage). */
302	push 3*WORD_SIZE(%ebp)
303	push 2*WORD_SIZE(%ebp)
304#endif
305
306	call vmread_error
307
308#ifndef CONFIG_X86_64
309	add $8, %esp
310#endif
311
312	/* Zero out @fault, which will be popped into the result register. */
313	_ASM_MOV $0, 3*WORD_SIZE(%_ASM_BP)
314
315#ifdef CONFIG_X86_64
316	pop %r11
317	pop %r10
318	pop %r9
319	pop %r8
320	pop %rsi
321	pop %rdi
322#endif
323	pop %_ASM_DX
324	pop %_ASM_CX
325	pop %_ASM_AX
326	pop %_ASM_BP
327
328	RET
329SYM_FUNC_END(vmread_error_trampoline)
330
331SYM_FUNC_START(vmx_do_interrupt_nmi_irqoff)
332	/*
333	 * Unconditionally create a stack frame, getting the correct RSP on the
334	 * stack (for x86-64) would take two instructions anyways, and RBP can
335	 * be used to restore RSP to make objtool happy (see below).
336	 */
337	push %_ASM_BP
338	mov %_ASM_SP, %_ASM_BP
339
340#ifdef CONFIG_X86_64
341	/*
342	 * Align RSP to a 16-byte boundary (to emulate CPU behavior) before
343	 * creating the synthetic interrupt stack frame for the IRQ/NMI.
344	 */
345	and  $-16, %rsp
346	push $__KERNEL_DS
347	push %rbp
348#endif
349	pushf
350	push $__KERNEL_CS
351	CALL_NOSPEC _ASM_ARG1
352
353	/*
354	 * "Restore" RSP from RBP, even though IRET has already unwound RSP to
355	 * the correct value.  objtool doesn't know the callee will IRET and,
356	 * without the explicit restore, thinks the stack is getting walloped.
357	 * Using an unwind hint is problematic due to x86-64's dynamic alignment.
358	 */
359	mov %_ASM_BP, %_ASM_SP
360	pop %_ASM_BP
361	RET
362SYM_FUNC_END(vmx_do_interrupt_nmi_irqoff)
363