xref: /illumos-gate/usr/src/uts/intel/ml/retpoline.S (revision cdd3e9a818787b4def17c9f707f435885ce0ed31)
1/*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source.  A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12/*
13 * Copyright 2019 Joyent, Inc.
14 * Copyright 2024 MNX Cloud, Inc.
15 */
16
17	.file	"retpoline.s"
18
19/*
20 * This file implements the various hooks that are needed for retpolines and
21 * return stack buffer (RSB) stuffing. For more information, please see the
22 * 'Speculative Execution CPU Side Channel Security' section of the
23 * uts/i86pc/os/cpuid.c big theory statement.
24 */
25
26#include <sys/asm_linkage.h>
27#include <sys/x86_archext.h>
28
29#if defined(__amd64)
30
31/*
32 * This macro generates the default retpoline entry point that the compiler
33 * expects. It implements the expected retpoline form.
34 */
35#define	RETPOLINE_MKTHUNK(reg) \
36	ENTRY(__x86_indirect_thunk_##reg)	\
37	call	2f;				\
381:						\
39	pause;					\
40	lfence;					\
41	jmp	1b;				\
422:						\
43	movq	%##reg, (%rsp);		\
44	ret;					\
45	SET_SIZE(__x86_indirect_thunk_##reg)
46
47/*
48 * This macro generates the default retpoline form. It exists in addition to the
49 * thunk so if we need to restore the default retpoline behavior to the thunk
50 * we can.
51 */
52#define	RETPOLINE_MKGENERIC(reg) \
53	ENTRY(__x86_indirect_thunk_gen_##reg)	\
54	call	2f;				\
551:						\
56	pause;					\
57	lfence;					\
58	jmp	1b;				\
592:						\
60	movq	%##reg, (%rsp);		\
61	ret;					\
62	SET_SIZE(__x86_indirect_thunk_gen_##reg)
63
64/*
65 * This macro generates the no-op form of the retpoline which will be used if we
66 * either need to disable retpolines because we have enhanced IBRS or because we
67 * have been asked to disable mitigations.
68 */
69#define	RETPOLINE_MKJUMP(reg)			\
70	ENTRY(__x86_indirect_thunk_jmp_##reg)	\
71	jmp	*%##reg;			\
72	SET_SIZE(__x86_indirect_thunk_jmp_##reg)
73
74	RETPOLINE_MKTHUNK(rax)
75	RETPOLINE_MKTHUNK(rbx)
76	RETPOLINE_MKTHUNK(rcx)
77	RETPOLINE_MKTHUNK(rdx)
78	RETPOLINE_MKTHUNK(rdi)
79	RETPOLINE_MKTHUNK(rsi)
80	RETPOLINE_MKTHUNK(rbp)
81	RETPOLINE_MKTHUNK(r8)
82	RETPOLINE_MKTHUNK(r9)
83	RETPOLINE_MKTHUNK(r10)
84	RETPOLINE_MKTHUNK(r11)
85	RETPOLINE_MKTHUNK(r12)
86	RETPOLINE_MKTHUNK(r13)
87	RETPOLINE_MKTHUNK(r14)
88	RETPOLINE_MKTHUNK(r15)
89
90	RETPOLINE_MKGENERIC(rax)
91	RETPOLINE_MKGENERIC(rbx)
92	RETPOLINE_MKGENERIC(rcx)
93	RETPOLINE_MKGENERIC(rdx)
94	RETPOLINE_MKGENERIC(rdi)
95	RETPOLINE_MKGENERIC(rsi)
96	RETPOLINE_MKGENERIC(rbp)
97	RETPOLINE_MKGENERIC(r8)
98	RETPOLINE_MKGENERIC(r9)
99	RETPOLINE_MKGENERIC(r10)
100	RETPOLINE_MKGENERIC(r11)
101	RETPOLINE_MKGENERIC(r12)
102	RETPOLINE_MKGENERIC(r13)
103	RETPOLINE_MKGENERIC(r14)
104	RETPOLINE_MKGENERIC(r15)
105
106	RETPOLINE_MKJUMP(rax)
107	RETPOLINE_MKJUMP(rbx)
108	RETPOLINE_MKJUMP(rcx)
109	RETPOLINE_MKJUMP(rdx)
110	RETPOLINE_MKJUMP(rdi)
111	RETPOLINE_MKJUMP(rsi)
112	RETPOLINE_MKJUMP(rbp)
113	RETPOLINE_MKJUMP(r8)
114	RETPOLINE_MKJUMP(r9)
115	RETPOLINE_MKJUMP(r10)
116	RETPOLINE_MKJUMP(r11)
117	RETPOLINE_MKJUMP(r12)
118	RETPOLINE_MKJUMP(r13)
119	RETPOLINE_MKJUMP(r14)
120	RETPOLINE_MKJUMP(r15)
121
122	/*
123	 * The x86_rsb_stuff{,_vmexit} functions can be called from pretty
124	 * arbitrary contexts. It's much easier for us to save and restore all
125	 * the registers we touch rather than clobber them for callers. You
126	 * must preserve this property or the system will panic at best.
127	 *
128	 * The reason for two entry points is because the need to RSB stuff
129	 * on Intel depends greatly on factors that are different in the
130	 * VMEXIT case, vs. the other context-switching cases
131	 *
132	 * See cpuid.c's cpuid_patch_rsb() for where the two entry points'
133	 * NOPs actually get patched with one-byte RETs as need be, and the
134	 * rules we use to determine which gets disabled with a RET, and which
135	 * maintain their NOP to proceed to executing the stuffing sequence.
136	 */
137	ENTRY_NP(x86_rsb_stuff_vmexit)
138	nop
139	ALTENTRY(x86_rsb_stuff)
140	nop
141	pushq	%rdi
142	pushq	%rax
143	movl	$16, %edi
144	movq	%rsp, %rax
145rsb_loop:
146	call	2f
1471:
148	pause
149	call	1b
1502:
151	call	2f
1521:
153	pause
154	call	1b
1552:
156	subl	$1, %edi
157	jnz	rsb_loop
158	movq	%rax, %rsp
159	popq	%rax
160	popq	%rdi
161	ret
162	SET_SIZE(x86_rsb_stuff)
163	SET_SIZE(x86_rsb_stuff_vmexit)
164
165	/*
166	 * The x86_bhb_clear() function is similar to x86_rsb_stuff(),
167	 * including its reasons for conservative register preservation, but
168	 * it clears branch-history with a software sequence from this
169	 * document (pardon the long URL):
170	 */
171	/* BEGIN CSTYLED */
172	/*
173	 * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/branch-history-injection.html
174	 */
175	/* END CSTYLED */
176	/*
177	 * The patchable-spot is a NOP which can be patched with a RET if
178	 * the CPU is properly working (either too old, mitigated, or actually
179	 * fixed, see cpuid.c).
180	 */
181	ENTRY_NP(x86_bhb_clear)
182	nop
183	pushq   %rcx
184	pushq   %rax
185	pushq   %rbx
186	movq    %rsp, %rbx
187
188        /* INTEL-PROVIDED SEQUENCE START */
189	movl	$5, %ecx
190	call	1f
191	jmp	5f
192	.align	64
1931:
194	call	2f
195	ret
196	.align	64
1972:
198	movl	$5, %eax
1993:
200	jmp	4f
201	nop
2024:
203	sub	$1, %eax
204	jnz	3b
205	sub	$1, %ecx
206	jnz	1b
207	ret
2085:
209	lfence
210	/* INTEL-PROVIDED SEQUENCE FINISH */
211
212	movq	%rbx, %rsp
213	popq	%rbx
214	popq	%rax
215	popq	%rcx
216	ret
217	SET_SIZE(x86_bhb_clear)
218
219#elif defined(__i386)
220
221/*
222 * While the kernel is 64-bit only, dboot is still 32-bit, so there are a
223 * limited number of variants that are used for 32-bit. However as dboot is
224 * short lived and uses them sparingly, we only do the full variant and do not
225 * have an AMD specific version.
226 */
227
228#define	RETPOLINE_MKTHUNK(reg) \
229	ENTRY(__x86_indirect_thunk_##reg)	\
230	call	2f;				\
2311:						\
232	pause;					\
233	lfence;					\
234	jmp	1b;				\
2352:						\
236	movl	%##reg, (%esp);		\
237	ret;					\
238	SET_SIZE(__x86_indirect_thunk_##reg)
239
240	RETPOLINE_MKTHUNK(edi)
241	RETPOLINE_MKTHUNK(eax)
242
243#else
244#error	"Your architecture is in another castle."
245#endif
246