xref: /titanic_41/usr/src/uts/intel/ia32/ml/float.s (revision f998c95e3b7029fe5f7542e115f7474ddb8024d7)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
29/*        All Rights Reserved   */
30
31/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
32/*        All Rights Reserved   */
33
34#pragma ident	"%Z%%M%	%I%	%E% SMI"
35
36#include <sys/asm_linkage.h>
37#include <sys/asm_misc.h>
38#include <sys/regset.h>
39#include <sys/privregs.h>
40#include <sys/x86_archext.h>
41
42#if defined(__lint)
43#include <sys/types.h>
44#include <sys/fp.h>
45#else
46#include "assym.h"
47#endif
48
49#if defined(__lint)
50
51uint_t
52fpu_initial_probe(void)
53{ return (0); }
54
55#else	/* __lint */
56
57	/*
58	 * Returns zero if x87 "chip" is present(!)
59	 */
60	ENTRY_NP(fpu_initial_probe)
61	CLTS
62	fninit
63	fnstsw	%ax
64	movzbl	%al, %eax
65	ret
66	SET_SIZE(fpu_initial_probe)
67
68#endif	/* __lint */
69
70#if defined(__lint)
71
72/*ARGSUSED*/
73void
74fxsave_insn(struct fxsave_state *fx)
75{}
76
77#else	/* __lint */
78
79#if defined(__amd64)
80
81	ENTRY_NP(fxsave_insn)
82	fxsave	(%rdi)
83	ret
84	SET_SIZE(fxsave_insn)
85
86#elif defined(__i386)
87
88	ENTRY_NP(fxsave_insn)
89	movl	4(%esp), %eax
90	fxsave	(%eax)
91	ret
92	SET_SIZE(fxsave_insn)
93
94#endif
95
96#endif	/* __lint */
97
98#if defined(__i386)
99
100/*
101 * If (num1/num2 > num1/num3) the FPU has the FDIV bug.
102 */
103
104#if defined(__lint)
105
106int
107fpu_probe_pentium_fdivbug(void)
108{ return (0); }
109
110#else	/* __lint */
111
112	ENTRY_NP(fpu_probe_pentium_fdivbug)
113	fldl	.num1
114	fldl	.num2
115	fdivr	%st(1), %st
116	fxch	%st(1)
117	fdivl	.num3
118	fcompp
119	fstsw	%ax
120	sahf
121	jae	0f
122	movl	$1, %eax
123	ret
124
1250:	xorl	%eax, %eax
126	ret
127
128	.align	4
129.num1:	.4byte	0xbce4217d	/* 4.999999 */
130	.4byte	0x4013ffff
131.num2:	.4byte	0x0		/* 15.0 */
132	.4byte	0x402e0000
133.num3:	.4byte	0xde7210bf	/* 14.999999 */
134	.4byte	0x402dffff
135	SET_SIZE(fpu_probe_pentium_fdivbug)
136
137#endif	/* __lint */
138
139/*
140 * To cope with processors that do not implement fxsave/fxrstor
141 * instructions, patch hot paths in the kernel to use them only
142 * when that feature has been detected.
143 */
144
145#if defined(__lint)
146
147void
148patch_sse(void)
149{}
150
151void
152patch_sse2(void)
153{}
154
155#else	/* __lint */
156
157	ENTRY_NP(patch_sse)
158	_HOT_PATCH_PROLOG
159	/
160	/	frstor (%ebx); nop	-> fxrstor (%ebx)
161	/
162	_HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3)
163	/
164	/	lock; xorl $0, (%esp)	-> sfence; ret
165	/
166	_HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
167	_HOT_PATCH_EPILOG
168	ret
169_fxrstor_ebx_insn:			/ see ndptrap_frstor()
170	fxrstor	(%ebx)
171_ldmxcsr_ebx_insn:			/ see resume_from_zombie()
172	ldmxcsr	(%ebx)
173_sfence_ret_insn:			/ see membar_producer()
174	.byte	0xf, 0xae, 0xf8		/ [sfence instruction]
175	ret
176	SET_SIZE(patch_sse)
177
178	ENTRY_NP(patch_sse2)
179	_HOT_PATCH_PROLOG
180	/
181	/	lock; xorl $0, (%esp)	-> lfence; ret
182	/
183	_HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
184	_HOT_PATCH_EPILOG
185	ret
186_lfence_ret_insn:			/ see membar_consumer()
187	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
188	ret
189	SET_SIZE(patch_sse2)
190
191#endif	/* __lint */
192#endif	/* __i386 */
193
194
195/*
196 * One of these routines is called from any lwp with floating
197 * point context as part of the prolog of a context switch.
198 */
199
200#if defined(__lint)
201
202/*ARGSUSED*/
203void
204fpxsave_ctxt(void *arg)
205{}
206
207/*ARGSUSED*/
208void
209fpnsave_ctxt(void *arg)
210{}
211
212#else	/* __lint */
213
214#if defined(__amd64)
215
216	ENTRY_NP(fpxsave_ctxt)
217	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
218	jne	1f
219
220	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
221	fxsave	FPU_CTX_FPU_REGS(%rdi)
222	/*
223	 * On certain AMD processors, the "exception pointers" i.e. the last
224	 * instruction pointer, last data pointer, and last opcode
225	 * are saved by the fxsave instruction ONLY if the exception summary
226	 * bit is set.
227	 *
228	 * To ensure that we don't leak these values into the next context
229	 * on the cpu, we could just issue an fninit here, but that's
230	 * rather slow and so we issue an instruction sequence that
231	 * clears them more quickly, if a little obscurely.
232	 */
233	btw	$7, FXSAVE_STATE_FSW(%rdi)	/* Test saved ES bit */
234	jnc	0f				/* jump if ES = 0 */
235	fnclex		/* clear pending x87 exceptions */
2360:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
237	fildl	.fpzero_const(%rip)
238			/* dummy load changes all exception pointers */
239	STTS(%rsi)	/* trap on next fpu touch */
2401:	rep;	ret	/* use 2 byte return instruction when branch target */
241			/* AMD Software Optimization Guide - Section 6.2 */
242	SET_SIZE(fpxsave_ctxt)
243
244#elif defined(__i386)
245
246	ENTRY_NP(fpnsave_ctxt)
247	movl	4(%esp), %eax		/* a struct fpu_ctx */
248	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
249	jne	1f
250
251	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
252	fnsave	FPU_CTX_FPU_REGS(%eax)
253			/* (fnsave also reinitializes x87 state) */
254	STTS(%edx)	/* trap on next fpu touch */
2551:	rep;	ret	/* use 2 byte return instruction when branch target */
256			/* AMD Software Optimization Guide - Section 6.2 */
257	SET_SIZE(fpnsave_ctxt)
258
259	ENTRY_NP(fpxsave_ctxt)
260	movl	4(%esp), %eax		/* a struct fpu_ctx */
261	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
262	jne	1f
263
264	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
265	fxsave	FPU_CTX_FPU_REGS(%eax)
266			/* (see notes above about "exception pointers") */
267	btw	$7, FXSAVE_STATE_FSW(%eax)	/* Test saved ES bit */
268	jnc	0f				/* jump if ES = 0 */
269	fnclex		/* clear pending x87 exceptions */
2700:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
271	fildl	.fpzero_const
272			/* dummy load changes all exception pointers */
273	STTS(%edx)	/* trap on next fpu touch */
2741:	rep;	ret	/* use 2 byte return instruction when branch target */
275			/* AMD Software Optimization Guide - Section 6.2 */
276	SET_SIZE(fpxsave_ctxt)
277
278#endif	/* __i386 */
279
280	.align	8
281.fpzero_const:
282	.4byte	0x0
283	.4byte	0x0
284
285#endif	/* __lint */
286
287
288#if defined(__lint)
289
290/*ARGSUSED*/
291void
292fpsave(struct fnsave_state *f)
293{}
294
295/*ARGSUSED*/
296void
297fpxsave(struct fxsave_state *f)
298{}
299
300#else	/* __lint */
301
302#if defined(__amd64)
303
304	ENTRY_NP(fpxsave)
305	CLTS
306	fxsave	(%rdi)
307	fninit				/* clear exceptions, init x87 tags */
308	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
309	ret
310	SET_SIZE(fpxsave)
311
312#elif defined(__i386)
313
314	ENTRY_NP(fpsave)
315	CLTS
316	movl	4(%esp), %eax
317	fnsave	(%eax)
318	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
319	ret
320	SET_SIZE(fpsave)
321
322	ENTRY_NP(fpxsave)
323	CLTS
324	movl	4(%esp), %eax
325	fxsave	(%eax)
326	fninit				/* clear exceptions, init x87 tags */
327	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
328	ret
329	SET_SIZE(fpxsave)
330
331#endif	/* __i386 */
332#endif	/* __lint */
333
334#if defined(__lint)
335
336/*ARGSUSED*/
337void
338fprestore(struct fnsave_state *f)
339{}
340
341/*ARGSUSED*/
342void
343fpxrestore(struct fxsave_state *f)
344{}
345
346#else	/* __lint */
347
348#if defined(__amd64)
349
350	ENTRY_NP(fpxrestore)
351	CLTS
352	fxrstor	(%rdi)
353	ret
354	SET_SIZE(fpxrestore)
355
356#elif defined(__i386)
357
358	ENTRY_NP(fprestore)
359	CLTS
360	movl	4(%esp), %eax
361	frstor	(%eax)
362	ret
363	SET_SIZE(fprestore)
364
365	ENTRY_NP(fpxrestore)
366	CLTS
367	movl	4(%esp), %eax
368	fxrstor	(%eax)
369	ret
370	SET_SIZE(fpxrestore)
371
372#endif	/* __i386 */
373#endif	/* __lint */
374
375/*
376 * Disable the floating point unit.
377 */
378
379#if defined(__lint)
380
381void
382fpdisable(void)
383{}
384
385#else	/* __lint */
386
387#if defined(__amd64)
388
389	ENTRY_NP(fpdisable)
390	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
391	ret
392	SET_SIZE(fpdisable)
393
394#elif defined(__i386)
395
396	ENTRY_NP(fpdisable)
397	STTS(%eax)
398	ret
399	SET_SIZE(fpdisable)
400
401#endif	/* __i386 */
402#endif	/* __lint */
403
404/*
405 * Initialize the fpu hardware.
406 */
407
408#if defined(__lint)
409
410void
411fpinit(void)
412{}
413
414#else	/* __lint */
415
416#if defined(__amd64)
417
418	ENTRY_NP(fpinit)
419	CLTS
420	leaq	sse_initial(%rip), %rax
421	fxrstor	(%rax)			/* load clean initial state */
422	ret
423	SET_SIZE(fpinit)
424
425#elif defined(__i386)
426
427	ENTRY_NP(fpinit)
428	CLTS
429	cmpl	$__FP_SSE, fp_kind
430	je	1f
431
432	fninit
433	movl	$x87_initial, %eax
434	frstor	(%eax)			/* load clean initial state */
435	ret
4361:
437	movl	$sse_initial, %eax
438	fxrstor	(%eax)			/* load clean initial state */
439	ret
440	SET_SIZE(fpinit)
441
442#endif	/* __i386 */
443#endif	/* __lint */
444
445/*
446 * Clears FPU exception state.
447 * Returns the FP status word.
448 */
449
450#if defined(__lint)
451
452uint32_t
453fperr_reset(void)
454{ return (0); }
455
456uint32_t
457fpxerr_reset(void)
458{ return (0); }
459
460#else	/* __lint */
461
462#if defined(__amd64)
463
464	ENTRY_NP(fperr_reset)
465	CLTS
466	xorl	%eax, %eax
467	fnstsw	%ax
468	fnclex
469	ret
470	SET_SIZE(fperr_reset)
471
472	ENTRY_NP(fpxerr_reset)
473	pushq	%rbp
474	movq	%rsp, %rbp
475	subq	$0x10, %rsp		/* make some temporary space */
476	CLTS
477	stmxcsr	(%rsp)
478	movl	(%rsp), %eax
479	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
480	ldmxcsr	(%rsp)			/* clear processor exceptions */
481	leave
482	ret
483	SET_SIZE(fpxerr_reset)
484
485#elif defined(__i386)
486
487	ENTRY_NP(fperr_reset)
488	CLTS
489	xorl	%eax, %eax
490	fnstsw	%ax
491	fnclex
492	ret
493	SET_SIZE(fperr_reset)
494
495	ENTRY_NP(fpxerr_reset)
496	CLTS
497	subl	$4, %esp		/* make some temporary space */
498	stmxcsr	(%esp)
499	movl	(%esp), %eax
500	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
501	ldmxcsr	(%esp)			/* clear processor exceptions */
502	addl	$4, %esp
503	ret
504	SET_SIZE(fpxerr_reset)
505
506#endif	/* __i386 */
507#endif	/* __lint */
508
509#if defined(__lint)
510
511uint32_t
512fpgetcwsw(void)
513{
514	return (0);
515}
516
517#else   /* __lint */
518
519#if defined(__amd64)
520
521	ENTRY_NP(fpgetcwsw)
522	pushq	%rbp
523	movq	%rsp, %rbp
524	subq	$0x10, %rsp		/* make some temporary space	*/
525	CLTS
526	fnstsw	(%rsp)			/* store the status word	*/
527	fnstcw	2(%rsp)			/* store the control word	*/
528	movl	(%rsp), %eax		/* put both in %eax		*/
529	leave
530	ret
531	SET_SIZE(fpgetcwsw)
532
533#elif defined(__i386)
534
535	ENTRY_NP(fpgetcwsw)
536	CLTS
537	subl	$4, %esp		/* make some temporary space	*/
538	fnstsw	(%esp)			/* store the status word	*/
539	fnstcw	2(%esp)			/* store the control word	*/
540	movl	(%esp), %eax		/* put both in %eax		*/
541	addl	$4, %esp
542	ret
543	SET_SIZE(fpgetcwsw)
544
545#endif	/* __i386 */
546#endif  /* __lint */
547
548/*
549 * Returns the MXCSR register.
550 */
551
552#if defined(__lint)
553
554uint32_t
555fpgetmxcsr(void)
556{
557	return (0);
558}
559
560#else   /* __lint */
561
562#if defined(__amd64)
563
564	ENTRY_NP(fpgetmxcsr)
565	pushq	%rbp
566	movq	%rsp, %rbp
567	subq	$0x10, %rsp		/* make some temporary space */
568	CLTS
569	stmxcsr	(%rsp)
570	movl	(%rsp), %eax
571	leave
572	ret
573	SET_SIZE(fpgetmxcsr)
574
575#elif defined(__i386)
576
577	ENTRY_NP(fpgetmxcsr)
578	CLTS
579	subl	$4, %esp		/* make some temporary space */
580	stmxcsr	(%esp)
581	movl	(%esp), %eax
582	addl	$4, %esp
583	ret
584	SET_SIZE(fpgetmxcsr)
585
586#endif	/* __i386 */
587#endif  /* __lint */
588