xref: /illumos-gate/usr/src/uts/intel/ml/float.S (revision 5d9d9091f564c198a760790b0bfa72c44e17912b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2018, Joyent, Inc.
25 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
26 */
27
28/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
29/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
30/*        All Rights Reserved   */
31
32/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
33/*        All Rights Reserved   */
34
35/*
36 * Copyright (c) 2009, Intel Corporation.
37 * All rights reserved.
38 */
39
40#include <sys/asm_linkage.h>
41#include <sys/asm_misc.h>
42#include <sys/regset.h>
43#include <sys/privregs.h>
44#include <sys/x86_archext.h>
45
46#include "assym.h"
47
48	/*
49	 * Returns zero if x87 "chip" is present(!)
50	 */
51	ENTRY_NP(fpu_initial_probe)
52	CLTS
53	fninit
54	fnstsw	%ax
55	movzbl	%al, %eax
56	ret
57	SET_SIZE(fpu_initial_probe)
58
59	ENTRY_NP(fxsave_insn)
60	fxsaveq (%rdi)
61	ret
62	SET_SIZE(fxsave_insn)
63
64/*
65 * One of these routines is called from any lwp with floating
66 * point context as part of the prolog of a context switch.
67 */
68
69/*
70 * These three functions define the Intel "xsave" handling for CPUs with
71 * different features. Newer AMD CPUs can also use these functions. See the
72 * 'exception pointers' comment below.
73 */
74	ENTRY_NP(fpxsave_ctxt)	/* %rdi is a struct fpu_ctx */
75	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
76	jne	1f
77	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
78	movq	FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
79	fxsaveq	(%rdi)
80	STTS(%rsi)	/* trap on next fpu touch */
811:	rep;	ret	/* use 2 byte return instruction when branch target */
82			/* AMD Software Optimization Guide - Section 6.2 */
83	SET_SIZE(fpxsave_ctxt)
84
85	ENTRY_NP(xsave_ctxt)
86	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
87	jne	1f
88	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
89	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
90	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
91	movq	FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
92	xsave	(%rsi)
93	STTS(%rsi)	/* trap on next fpu touch */
941:	ret
95	SET_SIZE(xsave_ctxt)
96
97	ENTRY_NP(xsaveopt_ctxt)
98	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
99	jne	1f
100	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
101	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
102	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
103	movq	FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
104	xsaveopt (%rsi)
105	STTS(%rsi)	/* trap on next fpu touch */
1061:	ret
107	SET_SIZE(xsaveopt_ctxt)
108
109/*
110 * On certain AMD processors, the "exception pointers" (i.e. the last
111 * instruction pointer, last data pointer, and last opcode) are saved by the
112 * fxsave, xsave or xsaveopt instruction ONLY if the exception summary bit is
113 * set.
114 *
115 * On newer CPUs, AMD has changed their behavior to mirror the Intel behavior.
116 * We can detect this via an AMD specific cpuid feature bit
117 * (CPUID_AMD_EBX_ERR_PTR_ZERO) and use the simpler Intel-oriented functions.
118 * Otherwise we use these more complex functions on AMD CPUs. All three follow
119 * the same logic after the xsave* instruction.
120 */
121	ENTRY_NP(fpxsave_excp_clr_ctxt)	/* %rdi is a struct fpu_ctx */
122	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
123	jne	1f
124	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
125	movq	FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
126	fxsaveq	(%rdi)
127	/*
128	 * To ensure that we don't leak these values into the next context
129	 * on the cpu, we could just issue an fninit here, but that's
130	 * rather slow and so we issue an instruction sequence that
131	 * clears them more quickly, if a little obscurely.
132	 */
133	btw	$7, FXSAVE_STATE_FSW(%rdi)	/* Test saved ES bit */
134	jnc	0f				/* jump if ES = 0 */
135	fnclex		/* clear pending x87 exceptions */
1360:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
137	fildl	.fpzero_const(%rip)
138			/* dummy load changes all exception pointers */
139	STTS(%rsi)	/* trap on next fpu touch */
1401:	rep;	ret	/* use 2 byte return instruction when branch target */
141			/* AMD Software Optimization Guide - Section 6.2 */
142	SET_SIZE(fpxsave_excp_clr_ctxt)
143
144	ENTRY_NP(xsave_excp_clr_ctxt)
145	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
146	jne	1f
147	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
148	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
149	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
150	movq	FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
151	xsave	(%rsi)
152	btw	$7, FXSAVE_STATE_FSW(%rsi)	/* Test saved ES bit */
153	jnc	0f				/* jump if ES = 0 */
154	fnclex		/* clear pending x87 exceptions */
1550:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
156	fildl	.fpzero_const(%rip) /* dummy load changes all excp. pointers */
157	STTS(%rsi)	/* trap on next fpu touch */
1581:	ret
159	SET_SIZE(xsave_excp_clr_ctxt)
160
161	ENTRY_NP(xsaveopt_excp_clr_ctxt)
162	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
163	jne	1f
164	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
165	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
166	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
167	movq	FPU_CTX_FPU_REGS(%rdi), %rsi /* fpu_regs.kfpu_u.kfpu_xs ptr */
168	xsaveopt (%rsi)
169	btw	$7, FXSAVE_STATE_FSW(%rsi)	/* Test saved ES bit */
170	jnc	0f				/* jump if ES = 0 */
171	fnclex		/* clear pending x87 exceptions */
1720:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
173	fildl	.fpzero_const(%rip) /* dummy load changes all excp. pointers */
174	STTS(%rsi)	/* trap on next fpu touch */
1751:	ret
176	SET_SIZE(xsaveopt_excp_clr_ctxt)
177
178	.align	8
179.fpzero_const:
180	.4byte	0x0
181	.4byte	0x0
182
183
184	ENTRY_NP(fpxsave)
185	CLTS
186	fxsaveq (%rdi)
187	fninit				/* clear exceptions, init x87 tags */
188	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
189	ret
190	SET_SIZE(fpxsave)
191
192	ENTRY_NP(xsave)
193	CLTS
194	movl	%esi, %eax		/* bv mask */
195	movq	%rsi, %rdx
196	shrq	$32, %rdx
197	xsave	(%rdi)
198
199	fninit				/* clear exceptions, init x87 tags */
200	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
201	ret
202	SET_SIZE(xsave)
203
204	ENTRY_NP(xsaveopt)
205	CLTS
206	movl	%esi, %eax		/* bv mask */
207	movq	%rsi, %rdx
208	shrq	$32, %rdx
209	xsaveopt (%rdi)
210
211	fninit				/* clear exceptions, init x87 tags */
212	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
213	ret
214	SET_SIZE(xsaveopt)
215
216/*
217 * These functions are used when restoring the FPU as part of the epilogue of a
218 * context switch.
219 */
220
221	ENTRY(fpxrestore_ctxt)
222	cmpl	$_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
223	jne	1f
224	movl	$_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
225	movq	FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_fx ptr */
226	CLTS
227	fxrstorq	(%rdi)
2281:
229	ret
230	SET_SIZE(fpxrestore_ctxt)
231
232	ENTRY(xrestore_ctxt)
233	cmpl	$_CONST(FPU_EN|FPU_VALID), FPU_CTX_FPU_FLAGS(%rdi)
234	jne	1f
235	movl	$_CONST(FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
236	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax /* xsave flags in EDX:EAX */
237	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
238	movq	FPU_CTX_FPU_REGS(%rdi), %rdi /* fpu_regs.kfpu_u.kfpu_xs ptr */
239	CLTS
240	xrstor	(%rdi)
2411:
242	ret
243	SET_SIZE(xrestore_ctxt)
244
245
246	ENTRY_NP(fpxrestore)
247	CLTS
248	fxrstorq	(%rdi)
249	ret
250	SET_SIZE(fpxrestore)
251
252	ENTRY_NP(xrestore)
253	CLTS
254	movl	%esi, %eax		/* bv mask */
255	movq	%rsi, %rdx
256	shrq	$32, %rdx
257	xrstor	(%rdi)
258	ret
259	SET_SIZE(xrestore)
260
261/*
262 * Disable the floating point unit.
263 */
264
265	ENTRY_NP(fpdisable)
266	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
267	ret
268	SET_SIZE(fpdisable)
269
270/*
271 * Initialize the fpu hardware.
272 */
273
274	ENTRY_NP(fpinit)
275	CLTS
276	cmpl	$FP_XSAVE, fp_save_mech
277	je	1f
278
279	/* fxsave */
280	leaq	sse_initial(%rip), %rax
281	fxrstorq	(%rax)			/* load clean initial state */
282	ret
283
2841:	/* xsave */
285	leaq	avx_initial(%rip), %rcx
286	xorl	%edx, %edx
287	movl	$XFEATURE_AVX, %eax
288	btl	$X86FSET_AVX, x86_featureset
289	cmovael	%edx, %eax
290	orl	$(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
291	xrstor (%rcx)
292	ret
293	SET_SIZE(fpinit)
294
295/*
296 * Clears FPU exception state.
297 * Returns the FP status word.
298 */
299
300	ENTRY_NP(fperr_reset)
301	CLTS
302	xorl	%eax, %eax
303	fnstsw	%ax
304	fnclex
305	ret
306	SET_SIZE(fperr_reset)
307
308	ENTRY_NP(fpxerr_reset)
309	pushq	%rbp
310	movq	%rsp, %rbp
311	subq	$0x10, %rsp		/* make some temporary space */
312	CLTS
313	stmxcsr	(%rsp)
314	movl	(%rsp), %eax
315	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
316	ldmxcsr	(%rsp)			/* clear processor exceptions */
317	leave
318	ret
319	SET_SIZE(fpxerr_reset)
320
321	ENTRY_NP(fpgetcwsw)
322	pushq	%rbp
323	movq	%rsp, %rbp
324	subq	$0x10, %rsp		/* make some temporary space	*/
325	CLTS
326	fnstsw	(%rsp)			/* store the status word	*/
327	fnstcw	2(%rsp)			/* store the control word	*/
328	movl	(%rsp), %eax		/* put both in %eax		*/
329	leave
330	ret
331	SET_SIZE(fpgetcwsw)
332
333/*
334 * Returns the MXCSR register.
335 */
336
337	ENTRY_NP(fpgetmxcsr)
338	pushq	%rbp
339	movq	%rsp, %rbp
340	subq	$0x10, %rsp		/* make some temporary space */
341	CLTS
342	stmxcsr	(%rsp)
343	movl	(%rsp), %eax
344	leave
345	ret
346	SET_SIZE(fpgetmxcsr)
347
348