xref: /titanic_50/usr/src/uts/intel/ia32/ml/float.s (revision a24e89c4a1eec8361718d94a6275e6720643284e)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
29/*        All Rights Reserved   */
30
31/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
32/*        All Rights Reserved   */
33
34#pragma ident	"%Z%%M%	%I%	%E% SMI"
35
36#include <sys/asm_linkage.h>
37#include <sys/asm_misc.h>
38#include <sys/regset.h>
39#include <sys/privregs.h>
40#include <sys/x86_archext.h>
41
42#if defined(__lint)
43#include <sys/types.h>
44#include <sys/fp.h>
45#else
46#include "assym.h"
47#endif
48
49#if defined(__lint)
50
51uint_t
52fpu_initial_probe(void)
53{ return (0); }
54
55#else	/* __lint */
56
57	/*
58	 * Returns zero if x87 "chip" is present(!)
59	 */
60	ENTRY_NP(fpu_initial_probe)
61	CLTS
62	fninit
63	fnstsw	%ax
64	movzbl	%al, %eax
65	ret
66	SET_SIZE(fpu_initial_probe)
67
68#endif	/* __lint */
69
70#if defined(__lint)
71
72/*ARGSUSED*/
73void
74fxsave_insn(struct fxsave_state *fx)
75{}
76
77#else	/* __lint */
78
79#if defined(__amd64)
80
81	ENTRY_NP(fxsave_insn)
82	FXSAVEQ	((%rdi))
83	ret
84	SET_SIZE(fxsave_insn)
85
86#elif defined(__i386)
87
88	ENTRY_NP(fxsave_insn)
89	movl	4(%esp), %eax
90	fxsave	(%eax)
91	ret
92	SET_SIZE(fxsave_insn)
93
94#endif
95
96#endif	/* __lint */
97
98#if defined(__i386)
99
100/*
101 * If (num1/num2 > num1/num3) the FPU has the FDIV bug.
102 */
103
104#if defined(__lint)
105
106int
107fpu_probe_pentium_fdivbug(void)
108{ return (0); }
109
110#else	/* __lint */
111
112	ENTRY_NP(fpu_probe_pentium_fdivbug)
113	fldl	.num1
114	fldl	.num2
115	fdivr	%st(1), %st
116	fxch	%st(1)
117	fdivl	.num3
118	fcompp
119	fstsw	%ax
120	sahf
121	jae	0f
122	movl	$1, %eax
123	ret
124
1250:	xorl	%eax, %eax
126	ret
127
128	.align	4
129.num1:	.4byte	0xbce4217d	/* 4.999999 */
130	.4byte	0x4013ffff
131.num2:	.4byte	0x0		/* 15.0 */
132	.4byte	0x402e0000
133.num3:	.4byte	0xde7210bf	/* 14.999999 */
134	.4byte	0x402dffff
135	SET_SIZE(fpu_probe_pentium_fdivbug)
136
137#endif	/* __lint */
138
139/*
140 * To cope with processors that do not implement fxsave/fxrstor
141 * instructions, patch hot paths in the kernel to use them only
142 * when that feature has been detected.
143 */
144
145#if defined(__lint)
146
147void
148patch_sse(void)
149{}
150
151void
152patch_sse2(void)
153{}
154
155#else	/* __lint */
156
157	ENTRY_NP(patch_sse)
158	_HOT_PATCH_PROLOG
159	/
160	/	frstor (%ebx); nop	-> fxrstor (%ebx)
161	/
162	_HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3)
163	/
164	/	lock; xorl $0, (%esp)	-> sfence; ret
165	/
166	_HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
167	_HOT_PATCH_EPILOG
168	ret
169_fxrstor_ebx_insn:			/ see ndptrap_frstor()
170	fxrstor	(%ebx)
171_ldmxcsr_ebx_insn:			/ see resume_from_zombie()
172	ldmxcsr	(%ebx)
173_sfence_ret_insn:			/ see membar_producer()
174	.byte	0xf, 0xae, 0xf8		/ [sfence instruction]
175	ret
176	SET_SIZE(patch_sse)
177
178	ENTRY_NP(patch_sse2)
179	_HOT_PATCH_PROLOG
180	/
181	/	lock; xorl $0, (%esp)	-> lfence; ret
182	/
183	_HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
184	_HOT_PATCH_EPILOG
185	ret
186_lfence_ret_insn:			/ see membar_consumer()
187	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
188	ret
189	SET_SIZE(patch_sse2)
190
191#endif	/* __lint */
192#endif	/* __i386 */
193
194
195/*
196 * One of these routines is called from any lwp with floating
197 * point context as part of the prolog of a context switch.
198 */
199
200#if defined(__lint)
201
202/*ARGSUSED*/
203void
204fpxsave_ctxt(void *arg)
205{}
206
207/*ARGSUSED*/
208void
209fpnsave_ctxt(void *arg)
210{}
211
212#else	/* __lint */
213
214#if defined(__amd64)
215
216	ENTRY_NP(fpxsave_ctxt)
217	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
218	jne	1f
219
220	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
221	FXSAVEQ	(FPU_CTX_FPU_REGS(%rdi))
222
223	/*
224	 * On certain AMD processors, the "exception pointers" i.e. the last
225	 * instruction pointer, last data pointer, and last opcode
226	 * are saved by the fxsave instruction ONLY if the exception summary
227	 * bit is set.
228	 *
229	 * To ensure that we don't leak these values into the next context
230	 * on the cpu, we could just issue an fninit here, but that's
231	 * rather slow and so we issue an instruction sequence that
232	 * clears them more quickly, if a little obscurely.
233	 */
234	btw	$7, FXSAVE_STATE_FSW(%rdi)	/* Test saved ES bit */
235	jnc	0f				/* jump if ES = 0 */
236	fnclex		/* clear pending x87 exceptions */
2370:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
238	fildl	.fpzero_const(%rip)
239			/* dummy load changes all exception pointers */
240	STTS(%rsi)	/* trap on next fpu touch */
2411:	rep;	ret	/* use 2 byte return instruction when branch target */
242			/* AMD Software Optimization Guide - Section 6.2 */
243	SET_SIZE(fpxsave_ctxt)
244
245#elif defined(__i386)
246
247	ENTRY_NP(fpnsave_ctxt)
248	movl	4(%esp), %eax		/* a struct fpu_ctx */
249	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
250	jne	1f
251
252	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
253	fnsave	FPU_CTX_FPU_REGS(%eax)
254			/* (fnsave also reinitializes x87 state) */
255	STTS(%edx)	/* trap on next fpu touch */
2561:	rep;	ret	/* use 2 byte return instruction when branch target */
257			/* AMD Software Optimization Guide - Section 6.2 */
258	SET_SIZE(fpnsave_ctxt)
259
260	ENTRY_NP(fpxsave_ctxt)
261	movl	4(%esp), %eax		/* a struct fpu_ctx */
262	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
263	jne	1f
264
265	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
266	fxsave	FPU_CTX_FPU_REGS(%eax)
267			/* (see notes above about "exception pointers") */
268	btw	$7, FXSAVE_STATE_FSW(%eax)	/* Test saved ES bit */
269	jnc	0f				/* jump if ES = 0 */
270	fnclex		/* clear pending x87 exceptions */
2710:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
272	fildl	.fpzero_const
273			/* dummy load changes all exception pointers */
274	STTS(%edx)	/* trap on next fpu touch */
2751:	rep;	ret	/* use 2 byte return instruction when branch target */
276			/* AMD Software Optimization Guide - Section 6.2 */
277	SET_SIZE(fpxsave_ctxt)
278
279#endif	/* __i386 */
280
281	.align	8
282.fpzero_const:
283	.4byte	0x0
284	.4byte	0x0
285
286#endif	/* __lint */
287
288
289#if defined(__lint)
290
291/*ARGSUSED*/
292void
293fpsave(struct fnsave_state *f)
294{}
295
296/*ARGSUSED*/
297void
298fpxsave(struct fxsave_state *f)
299{}
300
301#else	/* __lint */
302
303#if defined(__amd64)
304
305	ENTRY_NP(fpxsave)
306	CLTS
307	FXSAVEQ	((%rdi))
308	fninit				/* clear exceptions, init x87 tags */
309	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
310	ret
311	SET_SIZE(fpxsave)
312
313#elif defined(__i386)
314
315	ENTRY_NP(fpsave)
316	CLTS
317	movl	4(%esp), %eax
318	fnsave	(%eax)
319	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
320	ret
321	SET_SIZE(fpsave)
322
323	ENTRY_NP(fpxsave)
324	CLTS
325	movl	4(%esp), %eax
326	fxsave	(%eax)
327	fninit				/* clear exceptions, init x87 tags */
328	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
329	ret
330	SET_SIZE(fpxsave)
331
332#endif	/* __i386 */
333#endif	/* __lint */
334
335#if defined(__lint)
336
337/*ARGSUSED*/
338void
339fprestore(struct fnsave_state *f)
340{}
341
342/*ARGSUSED*/
343void
344fpxrestore(struct fxsave_state *f)
345{}
346
347#else	/* __lint */
348
349#if defined(__amd64)
350
351	ENTRY_NP(fpxrestore)
352	CLTS
353	FXRSTORQ	((%rdi))
354	ret
355	SET_SIZE(fpxrestore)
356
357#elif defined(__i386)
358
359	ENTRY_NP(fprestore)
360	CLTS
361	movl	4(%esp), %eax
362	frstor	(%eax)
363	ret
364	SET_SIZE(fprestore)
365
366	ENTRY_NP(fpxrestore)
367	CLTS
368	movl	4(%esp), %eax
369	fxrstor	(%eax)
370	ret
371	SET_SIZE(fpxrestore)
372
373#endif	/* __i386 */
374#endif	/* __lint */
375
376/*
377 * Disable the floating point unit.
378 */
379
380#if defined(__lint)
381
382void
383fpdisable(void)
384{}
385
386#else	/* __lint */
387
388#if defined(__amd64)
389
390	ENTRY_NP(fpdisable)
391	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
392	ret
393	SET_SIZE(fpdisable)
394
395#elif defined(__i386)
396
397	ENTRY_NP(fpdisable)
398	STTS(%eax)
399	ret
400	SET_SIZE(fpdisable)
401
402#endif	/* __i386 */
403#endif	/* __lint */
404
405/*
406 * Initialize the fpu hardware.
407 */
408
409#if defined(__lint)
410
411void
412fpinit(void)
413{}
414
415#else	/* __lint */
416
417#if defined(__amd64)
418
419	ENTRY_NP(fpinit)
420	CLTS
421	leaq	sse_initial(%rip), %rax
422	FXRSTORQ	((%rax))		/* load clean initial state */
423	ret
424	SET_SIZE(fpinit)
425
426#elif defined(__i386)
427
428	ENTRY_NP(fpinit)
429	CLTS
430	cmpl	$__FP_SSE, fp_kind
431	je	1f
432
433	fninit
434	movl	$x87_initial, %eax
435	frstor	(%eax)			/* load clean initial state */
436	ret
4371:
438	movl	$sse_initial, %eax
439	fxrstor	(%eax)			/* load clean initial state */
440	ret
441	SET_SIZE(fpinit)
442
443#endif	/* __i386 */
444#endif	/* __lint */
445
446/*
447 * Clears FPU exception state.
448 * Returns the FP status word.
449 */
450
451#if defined(__lint)
452
453uint32_t
454fperr_reset(void)
455{ return (0); }
456
457uint32_t
458fpxerr_reset(void)
459{ return (0); }
460
461#else	/* __lint */
462
463#if defined(__amd64)
464
465	ENTRY_NP(fperr_reset)
466	CLTS
467	xorl	%eax, %eax
468	fnstsw	%ax
469	fnclex
470	ret
471	SET_SIZE(fperr_reset)
472
473	ENTRY_NP(fpxerr_reset)
474	pushq	%rbp
475	movq	%rsp, %rbp
476	subq	$0x10, %rsp		/* make some temporary space */
477	CLTS
478	stmxcsr	(%rsp)
479	movl	(%rsp), %eax
480	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
481	ldmxcsr	(%rsp)			/* clear processor exceptions */
482	leave
483	ret
484	SET_SIZE(fpxerr_reset)
485
486#elif defined(__i386)
487
488	ENTRY_NP(fperr_reset)
489	CLTS
490	xorl	%eax, %eax
491	fnstsw	%ax
492	fnclex
493	ret
494	SET_SIZE(fperr_reset)
495
496	ENTRY_NP(fpxerr_reset)
497	CLTS
498	subl	$4, %esp		/* make some temporary space */
499	stmxcsr	(%esp)
500	movl	(%esp), %eax
501	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
502	ldmxcsr	(%esp)			/* clear processor exceptions */
503	addl	$4, %esp
504	ret
505	SET_SIZE(fpxerr_reset)
506
507#endif	/* __i386 */
508#endif	/* __lint */
509
510#if defined(__lint)
511
512uint32_t
513fpgetcwsw(void)
514{
515	return (0);
516}
517
518#else   /* __lint */
519
520#if defined(__amd64)
521
522	ENTRY_NP(fpgetcwsw)
523	pushq	%rbp
524	movq	%rsp, %rbp
525	subq	$0x10, %rsp		/* make some temporary space	*/
526	CLTS
527	fnstsw	(%rsp)			/* store the status word	*/
528	fnstcw	2(%rsp)			/* store the control word	*/
529	movl	(%rsp), %eax		/* put both in %eax		*/
530	leave
531	ret
532	SET_SIZE(fpgetcwsw)
533
534#elif defined(__i386)
535
536	ENTRY_NP(fpgetcwsw)
537	CLTS
538	subl	$4, %esp		/* make some temporary space	*/
539	fnstsw	(%esp)			/* store the status word	*/
540	fnstcw	2(%esp)			/* store the control word	*/
541	movl	(%esp), %eax		/* put both in %eax		*/
542	addl	$4, %esp
543	ret
544	SET_SIZE(fpgetcwsw)
545
546#endif	/* __i386 */
547#endif  /* __lint */
548
549/*
550 * Returns the MXCSR register.
551 */
552
553#if defined(__lint)
554
555uint32_t
556fpgetmxcsr(void)
557{
558	return (0);
559}
560
561#else   /* __lint */
562
563#if defined(__amd64)
564
565	ENTRY_NP(fpgetmxcsr)
566	pushq	%rbp
567	movq	%rsp, %rbp
568	subq	$0x10, %rsp		/* make some temporary space */
569	CLTS
570	stmxcsr	(%rsp)
571	movl	(%rsp), %eax
572	leave
573	ret
574	SET_SIZE(fpgetmxcsr)
575
576#elif defined(__i386)
577
578	ENTRY_NP(fpgetmxcsr)
579	CLTS
580	subl	$4, %esp		/* make some temporary space */
581	stmxcsr	(%esp)
582	movl	(%esp), %eax
583	addl	$4, %esp
584	ret
585	SET_SIZE(fpgetmxcsr)
586
587#endif	/* __i386 */
588#endif  /* __lint */
589