xref: /titanic_51/usr/src/uts/intel/ia32/ml/float.s (revision fa9e4066f08beec538e775443c5be79dd423fcab)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
29/*        All Rights Reserved   */
30
31/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
32/*        All Rights Reserved   */
33
34#pragma ident	"%Z%%M%	%I%	%E% SMI"
35
36#include <sys/asm_linkage.h>
37#include <sys/regset.h>
38#include <sys/privregs.h>
39#include <sys/x86_archext.h>
40
41#if defined(__lint)
42#include <sys/types.h>
43#include <sys/fp.h>
44#else
45#include "assym.h"
46#endif
47
48#if defined(__lint)
49
50int fpu_exists = 1;
51int fp_kind = FP_387;
52int fpu_ignored = 0;
53
54int use_sse_pagecopy = 0;
55int use_sse_pagezero = 0;
56int use_sse_copy = 0;
57
58#if defined(__i386)
59
60int fpu_pentium_fdivbug = 0;
61
62#endif
63
64#else	/* __lint */
65
66	/*
67	 * If fpu_exists is non-zero, fpu_probe will attempt to use any
68	 * hardware FPU (subject to other constraints, see below).  If
69	 * fpu_exists is zero, fpu_probe will report that there is no
70	 * FPU even if there is one.
71	 */
72	DGDEF3(fpu_exists, 4, 4)
73	.long	1
74
75	DGDEF3(fp_kind, 4, 4)
76	.long	FP_387		/* FP_NO, FP_287, FP_387, etc. */
77
78	/*
79	 * The variable fpu_ignored is provided to allow other code to
80	 * determine whether emulation is being done because there is
81	 * no FPU or because of an override requested via /etc/system.
82	 */
83	DGDEF3(fpu_ignored, 4, 4)
84	.long	0
85
86	/*
87	 * Used by ppcopy, ppzero, and xcopyin to determine whether or not
88	 * to use the SSE-based routines
89	 */
90	DGDEF3(use_sse_pagecopy, 4, 4)
91	.long	0
92
93	DGDEF3(use_sse_pagezero, 4, 4)
94	.long	0
95
96	DGDEF3(use_sse_copy, 4, 4)
97	.long	0
98
99#if defined(__i386)
100
101	/*
102	 * The variable fpu_pentium_fdivbug is provided to allow other code to
103	 * determine whether the system contains a Pentium with the FDIV
104	 * problem.
105	 */
106	DGDEF3(fpu_pentium_fdivbug, 4, 4)
107	.long	0
108
109	/*
110	 * The following constants are used for detecting the Pentium
111	 * divide bug.
112	 */
113	.align	4
114num1:	.4byte	0xbce4217d	/* 4.999999 */
115	.4byte	0x4013ffff
116num2:	.4byte	0x0		/* 15.0 */
117	.4byte	0x402e0000
118num3:	.4byte	0xde7210bf	/* 14.999999 */
119	.4byte	0x402dffff
120
121#endif	/* __i386 */
122#endif	/* __lint */
123
124/*
125 * FPU probe - check if we have any FP chip present by trying to do a reset.
126 * If that succeeds, differentiate via cr0. Called from autoconf.
127 */
128
129#if defined(__lint)
130
131/*ARGSUSED*/
132void
133fpu_probe(void)
134{}
135
136#else	/* __lint */
137
138#if defined(__amd64)
139
140	ENTRY_NP(fpu_probe)
141	pushq	%rbp
142	movq	%rsp, %rbp
143	clts				/* clear task switched bit in CR0 */
144	fninit				/* initialize chip */
145	fnstsw	%ax			/* get status */
146	orb	%al, %al		/* status zero? 0 = chip present */
147	jnz	no_fpu_hw
148
149	/*
150	 * Ignore the FPU if fp_exists == 0
151	 */
152	cmpl	$0, fpu_exists(%rip)
153	je	ignore_fpu
154
155	/*
156	 * we have a chip of some sort; use cr0 to differentiate
157	 */
158	movq	%cr0, %rdx		/* check for fpu present flag */
159	testl	$CR0_ET, %edx
160	jz	no_fpu_hw		/* z -> fpu not present */
161	testl	$X86_SSE, x86_feature(%rip)
162	je	no_fpu_hw		/* SSE is utterly required */
163	testl	$X86_SSE2, x86_feature(%rip)
164	je	no_fpu_hw		/* SSE2 too .. */
165	movl	$__FP_SSE, fp_kind(%rip)
166
167	/*
168	 * Tell the processor what we're doing via %cr4
169	 */
170	movq	%cr4, %rax
171	orq	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax
172	movq	%rax, %cr4
173
174	/*
175	 * make other CPUs share the same cr4 settings
176	 */
177	orq	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip)
178
179	/*
180	 * extract the MXCSR_MASK field from our first fxsave
181	 */
182	subq	$FXSAVE_STATE_SIZE, %rsp
183	movl	$0, FXSAVE_STATE_MXCSR_MASK(%rsp)
184	fxsave	(%rsp)
185	movl	FXSAVE_STATE_MXCSR_MASK(%rsp), %eax
186	cmpl	$0, %eax
187	je	1f			/* default mask value set in fpu.c */
188	movl	%eax, sse_mxcsr_mask(%rip) /* override mask set here */
1891:
190	movq	%cr0, %rax
191	andq	$_BITNOT(CR0_TS|CR0_EM), %rdx	/* clear emulate math bit */
192	orq	$_CONST(CR0_MP|CR0_NE), %rdx
193
194	/*
195	 * We have SSE and SSE2 so enable the extensions for
196	 * non-temporal copies and stores.
197	 */
198	movl	$1, use_sse_pagecopy
199	movl	$1, use_sse_pagezero
200	movl	$1, use_sse_copy
201
202	jmp	done
203
204	/*
205	 * Do not use the FPU at all
206	 */
207ignore_fpu:
208	movl	$1, fpu_ignored(%rip)
209
210	/*
211	 * No FPU hardware present
212	 */
213no_fpu_hw:
214	andq	$_BITNOT(CR0_MP), %rdx	/* clear math chip present */
215	orq	$CR0_EM, %rdx		/* set emulate math bit */
216	movl	$FP_NO, fp_kind(%rip)	/* signify that there is no FPU */
217	movl	$0, fpu_exists(%rip)	/* no FPU present */
218	/*
219	 * Disable the XMM-related gorp too, in case the BIOS set them
220	 */
221	movq	%cr4, %rax
222	andq	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax
223	movq	%rax, %cr4
224	andq	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip)
225
226done:
227	movq	%rdx, %cr0		/* set machine status word */
228	leave
229	ret
230	SET_SIZE(fpu_probe)
231
232#elif defined(__i386)
233
234	ENTRY_NP(fpu_probe)
235	clts				/ clear task switched bit in CR0
236	fninit				/ initialize chip
237	fnstsw	%ax			/ get status
238	orb	%al, %al		/ status zero? 0 = chip present
239	jnz	no_fpu_hw		/ no, use emulator
240/
241/ If there is an FP, look for the Pentium FDIV problem even if we
242/ do not plan to use it.  Set fpu_pentium_fdivbug is a bad FPU is
243/ detected.  Subsequent code can report the result if desired.
244/
245/ If (num1/num2 > num1/num3) the FPU has the FDIV bug.
246/
247	fldl	num1
248	fldl	num2
249	fdivr	%st(1), %st
250	fxch	%st(1)
251	fdivl	num3
252	fcompp
253	fstsw	%ax
254	sahf
255	jae	no_bug
256	movl	$1, fpu_pentium_fdivbug
257no_bug:
258/
259/ Repeat the earlier initialization sequence so that the FPU is left in
260/ the expected state.
261/
262	fninit
263	fnstsw	%ax
264/
265/ Ignore the FPU if fpu_exists == 0
266/
267	cmpl	$0, fpu_exists
268	je	ignore_fpu
269/
270/ Ignore the FPU if it has the Pentium bug
271/
272	cmpl	$0, fpu_pentium_fdivbug
273	jne	ignore_fpu
274/
275/ at this point we know we have a chip of some sort;
276/ use cr0 to differentiate.
277/
278	movl    %cr0, %edx		/ check for 387 present flag
279	testl	$CR0_ET, %edx		/ ...
280	jz	is287			/ z -> 387 not present
281	movl	$FP_387, fp_kind	/ we have a 387 or later chip
282/
283/ clear the "XMM supported" bits in %cr4 in case the BIOS set them
284/ erroneously -- see 4965674
285/
286	movl	%cr4, %eax
287	andl	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
288	movl	%eax, %cr4
289	andl	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value
290
291	testl	$X86_SSE, x86_feature	/ can we do SSE?
292	je	mathchip
293/
294/ aha .. we have an SSE-capable chip
295/
296/ - set fpsave_begin to fpxsave_begin
297/ - hot patch performance critical code to use fxsave/fxrstor directly,
298/   and hot patch membar_producer() to use sfence instead of lock
299/ - tell the processor what we're doing via %cr4
300/ - allow fully fledged #XM exceptions to be generated by SSE/SSE2
301/   (the default mask set in fpinit() disables them)
302/ - determine the mxcsr_mask so we can avoid setting reserved bits
303/
304	movl	$__FP_SSE, fp_kind
305	movl	$fpxsave_begin, %eax
306	movl	%eax, fpsave_begin
307	call	patch_sse
308	mov	%cr4, %eax
309	orl	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
310	mov	%eax, %cr4
311/
312/ make other CPUs share the same cr4 settings
313/
314	orl	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value
315/
316/ extract the MXCSR_MASK field from our first fxsave
317/
318	subl	$FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
319	movl	%esp, %eax
320	addl	$XMM_ALIGN, %eax
321	andl	$_BITNOT(XMM_ALIGN-1), %eax	/* 16-byte alignment */
322	movl	$0, FXSAVE_STATE_MXCSR_MASK(%eax)
323	fxsave	(%eax)
324	movl	FXSAVE_STATE_MXCSR_MASK(%eax), %eax
325	addl	$FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
326	cmpl	$0, %eax
327	je	1f			/ default mask value set in fpu.c
328	movl	%eax, sse_mxcsr_mask	/ override mask set here
3291:	testl	$X86_SSE2, x86_feature	/ can we do SSE2?
330	je	mathchip
331/
332/ aha .. we have an SSE2-capable chip
333/
334/ - enable pagezero and pagecopy using non-temporal instructions
335/ - hot patch membar_consumer() to use lfence instead of lock
336/
337	movl	$1, use_sse_pagecopy	/ will now call hwblkpagecopy
338	movl	$1, use_sse_pagezero	/ will now call hwblkclr
339	movl	$1, use_sse_copy
340	call	patch_sse2
341	jmp	mathchip
342/
343/ No 387; we must have an 80287.
344/
345is287:
346#if !defined(__GNUC_AS__)
347	fsetpm				/ set the 80287 into protected mode
348	movl	$FP_287, fp_kind	/ we have a 287 chip
349#else
350	movl	$FP_NO, fp_kind		/ maybe just explode here instead?
351#endif
352/
353/ We have either a 287, 387, 486 or P5.
354/ Setup cr0 to reflect the FPU hw type.
355/
356mathchip:
357	movl	%cr0, %edx
358	andl	$_BITNOT(CR0_TS|CR0_EM), %edx	/* clear emulate math bit */
359	orl	$_CONST(CR0_MP|CR0_NE), %edx
360	jmp	cont
361
362/ Do not use the FPU
363ignore_fpu:
364	movl	$1, fpu_ignored
365/ No FP hw present.
366no_fpu_hw:
367	movl	%cr0, %edx
368	andl	$_BITNOT(CR0_MP), %edx	/* clear math chip present */
369	movl	$FP_NO, fp_kind		/ signify that there is no FPU
370	movl	$0, fpu_exists		/ no FPU present
371cont:
372	movl	%edx, %cr0		/ set machine status word
373	ret
374	SET_SIZE(fpu_probe)
375
376#define	HOT_PATCH(srcaddr, dstaddr, size)	\
377	movl	$srcaddr, %esi;			\
378	movl	$dstaddr, %edi;			\
379	movl	$size, %ebx;			\
3800:	pushl	$1;				\
381	movzbl	(%esi), %eax;			\
382	pushl	%eax;				\
383	pushl	%edi;				\
384	call	hot_patch_kernel_text;		\
385	addl	$12, %esp;			\
386	inc	%edi;				\
387	inc	%esi;				\
388	dec	%ebx;				\
389	test	%ebx, %ebx;			\
390	jne	0b
391
392	/*
393	 * To cope with processors that do not implement fxsave/fxrstor
394	 * instructions, patch hot paths in the kernel to use them only
395	 * when that feature has been detected.
396	 */
397	ENTRY_NP(patch_sse)
398	push	%ebp
399	mov	%esp, %ebp
400	push	%ebx
401	push	%esi
402	push	%edi
403	/
404	/	frstor (%eax); nop	-> fxrstor (%eax)
405	/
406	HOT_PATCH(_fxrstor_eax_insn, _patch_fxrstor_eax, 3)
407	/
408	/	nop; nop; nop		-> ldmxcsr (%ebx)
409	/
410	HOT_PATCH(_ldmxcsr_ebx_insn, _patch_ldmxcsr_ebx, 3)
411	/
412	/	lock; xorl $0, (%esp)	-> sfence; ret
413	/
414	HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
415	pop	%edi
416	pop	%esi
417	pop	%ebx
418	mov	%ebp, %esp
419	pop	%ebp
420	ret
421_fxrstor_eax_insn:			/ see ndptrap_frstor()
422	fxrstor	(%eax)
423_ldmxcsr_ebx_insn:			/ see resume_from_zombie()
424	ldmxcsr	(%ebx)
425_sfence_ret_insn:			/ see membar_producer()
426	.byte	0xf, 0xae, 0xf8		/ [sfence instruction]
427	ret
428	SET_SIZE(patch_sse)
429
430	/*
431	 * Ditto, but this time for functions that depend upon SSE2 extensions
432	 */
433	ENTRY_NP(patch_sse2)
434	push	%ebp
435	mov	%esp, %ebp
436	push	%ebx
437	push	%esi
438	push	%edi
439	/
440	/	lock; xorl $0, (%esp)	-> lfence; ret
441	/
442	HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
443	pop	%edi
444	pop	%esi
445	pop	%ebx
446	mov	%ebp, %esp
447	pop	%ebp
448	ret
449_lfence_ret_insn:			/ see membar_consumer()
450	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
451	ret
452	SET_SIZE(patch_sse2)
453
454#endif	/* __i386 */
455#endif	/* __lint */
456
457
458/*
459 * One of these routines is called from any lwp with floating
460 * point context as part of the prolog of a context switch; the
461 * routine starts the floating point state save operation.
462 * The completion of the save is forced by an fwait just before
463 * we truly switch contexts..
464 */
465
466#if defined(__lint)
467
468/*ARGSUSED*/
469void
470fpnsave_begin(void *arg)
471{}
472
473/*ARGSUSED*/
474void
475fpxsave_begin(void *arg)
476{}
477
478#else	/* __lint */
479
480#if defined(__amd64)
481
482	ENTRY_NP(fpxsave_begin)
483	movl	FPU_CTX_FPU_FLAGS(%rdi), %edx
484	cmpl	$FPU_EN, %edx
485	jne	1f
486#if FPU_CTX_FPU_REGS != 0
487	addq	FPU_CTX_FPU_REGS, %rdi
488#endif
489	fxsave	(%rdi)
490	fnclex				/* clear pending x87 exceptions */
4911:	rep;	ret	/* use 2 byte return instruction when branch target */
492			/* AMD Software Optimization Guide - Section 6.2 */
493	SET_SIZE(fpxsave_begin)
494
495#elif defined(__i386)
496
497	ENTRY_NP(fpnsave_begin)
498	mov	4(%esp), %eax		/ a struct fpu_ctx *
499	mov	FPU_CTX_FPU_FLAGS(%eax), %edx
500	cmpl	$FPU_EN, %edx
501	jne	1f
502#if FPU_CTX_FPU_REGS != 0
503	addl	FPU_CTX_FPU_REGS, %eax
504#endif
505	fnsave	(%eax)
5061:	rep;	ret	/* use 2 byte return instruction when branch target */
507			/* AMD Software Optimization Guide - Section 6.2 */
508	SET_SIZE(fpnsave_begin)
509
510	ENTRY_NP(fpxsave_begin)
511	mov	4(%esp), %eax		/ a struct fpu_ctx *
512	mov	FPU_CTX_FPU_FLAGS(%eax), %edx
513	cmpl	$FPU_EN, %edx
514	jne	1f
515#if FPU_CTX_FPU_REGS != 0
516	addl	FPU_CTX_FPU_REGS, %eax
517#endif
518	fxsave	(%eax)
519	fnclex				/ Clear pending x87 exceptions
5201:	rep;	ret	/* use 2 byte return instruction when branch target */
521			/* AMD Software Optimization Guide - Section 6.2 */
522	SET_SIZE(fpxsave_begin)
523
524#endif	/* __i386 */
525#endif	/* __lint */
526
527#if defined(__lint)
528
529/*ARGSUSED*/
530void
531fpsave(struct fnsave_state *f)
532{}
533
534/*ARGSUSED*/
535void
536fpxsave(struct fxsave_state *f)
537{}
538
539#else	/* __lint */
540
541#if defined(__amd64)
542
543	ENTRY_NP(fpxsave)
544	clts				/* clear TS bit in CR0 */
545	fxsave	(%rdi)
546	fnclex				/* clear pending x87 exceptions */
547	fwait				/* wait for completion */
548	fninit				/* emulate fnsave: init x87 tags */
549	movq	%cr0, %rax
550	orq	$CR0_TS, %rax
551	movq	%rax, %cr0		/* set TS bit in CR0 (disable FPU) */
552	ret
553	SET_SIZE(fpxsave)
554
555#elif defined(__i386)
556
557	ENTRY_NP(fpsave)
558	clts				/ clear TS bit in CR0
559	movl	4(%esp), %eax		/ load save address
560	fnsave	(%eax)
561	fwait				/ wait for completion
562	movl	%cr0, %eax
563	orl	$CR0_TS, %eax
564	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
565	ret
566	SET_SIZE(fpsave)
567
568	ENTRY_NP(fpxsave)
569	clts				/ clear TS bit in CR0
570	movl	4(%esp), %eax		/ save address
571	fxsave	(%eax)
572	fnclex				/ Clear pending x87 exceptions
573	fwait				/ wait for completion
574	fninit				/ emulate fnsave: init x87 tag words
575	mov	%cr0, %eax
576	orl	$CR0_TS, %eax
577	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
578	ret
579	SET_SIZE(fpxsave)
580
581#endif	/* __i386 */
582#endif	/* __lint */
583
584#if defined(__lint)
585
586/*ARGSUSED*/
587void
588fprestore(struct fnsave_state *f)
589{}
590
591/*ARGSUSED*/
592void
593fpxrestore(struct fxsave_state *f)
594{}
595
596#else	/* __lint */
597
598#if defined(__amd64)
599
600	ENTRY_NP(fpxrestore)
601	clts				/* clear TS bit in CR0 */
602	fxrstor	(%rdi)
603	ret
604	SET_SIZE(fpxrestore)
605
606#elif defined(__i386)
607
608	ENTRY_NP(fprestore)
609	clts				/ clear TS bit in CR0
610	movl	4(%esp), %eax		/ load restore address
611	frstor	(%eax)
612	ret
613	SET_SIZE(fprestore)
614
615	ENTRY_NP(fpxrestore)
616	clts				/ clear TS bit in CR0
617	movl	4(%esp), %eax		/ load restore address
618	fxrstor	(%eax)
619	ret
620	SET_SIZE(fpxrestore)
621
622#endif	/* __i386 */
623#endif	/* __lint */
624
625/*
626 * Disable the floating point unit.
627 */
628
629#if defined(__lint)
630
631void
632fpdisable(void)
633{}
634
635#else	/* __lint */
636
637#if defined(__amd64)
638
639	ENTRY_NP(fpdisable)
640	movq	%cr0, %rax
641	orq	$CR0_TS, %rax
642	movq	%rax, %cr0		/* set TS bit in CR0 (disable FPU) */
643	ret
644	SET_SIZE(fpdisable)
645
646#elif defined(__i386)
647
648	ENTRY_NP(fpdisable)
649	movl	%cr0, %eax
650	orl	$CR0_TS, %eax
651	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
652	ret
653	SET_SIZE(fpdisable)
654
655#endif	/* __i386 */
656#endif	/* __lint */
657
658/*
659 * Initialize the fpu hardware.
660 */
661
662#if defined(__lint)
663
664void
665fpinit(void)
666{}
667
668#else	/* __lint */
669
670#if defined(__amd64)
671
672	ENTRY_NP(fpinit)
673	clts				/* clear TS bit in CR0 */
674	leaq	sse_initial(%rip), %rax
675	fxrstor	(%rax)			/* load clean initial state */
676	ret
677	SET_SIZE(fpinit)
678
679#elif defined(__i386)
680
681	ENTRY_NP(fpinit)
682	clts				/ clear TS bit in CR0
683	cmpl	$__FP_SSE, fp_kind
684	je	1f
685
686	fninit				/ initialize the chip
687	movl	$x87_initial, %eax
688	frstor	(%eax)			/ load clean initial state
689	ret
6901:
691	movl	$sse_initial, %eax
692	fxrstor	(%eax)			/ load clean initial state
693	ret
694	SET_SIZE(fpinit)
695
696#endif	/* __i386 */
697#endif	/* __lint */
698
699/*
700 * Clears FPU exception state.
701 * Returns the FP status word.
702 */
703
704#if defined(__lint)
705
706uint32_t
707fperr_reset(void)
708{
709	return (0);
710}
711
712uint32_t
713fpxerr_reset(void)
714{
715	return (0);
716}
717
718#else	/* __lint */
719
720#if defined(__amd64)
721
722	ENTRY_NP(fperr_reset)
723	xorl	%eax, %eax
724	clts				/* clear TS bit in CR0 */
725	fnstsw	%ax			/* get status */
726	fnclex				/* clear processor exceptions */
727	ret
728	SET_SIZE(fperr_reset)
729
730	ENTRY_NP(fpxerr_reset)
731	pushq	%rbp
732	movq	%rsp, %rbp
733	subq	$0x10, %rsp		/* make some temporary space */
734	clts				/* clear TS bit in CR0 */
735	stmxcsr	(%rsp)			/* get status */
736	movl	(%rsp), %eax
737	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
738	ldmxcsr	(%rsp)			/* clear processor exceptions */
739	leave
740	ret
741	SET_SIZE(fpxerr_reset)
742
743#elif defined(__i386)
744
745	ENTRY_NP(fperr_reset)
746	xorl	%eax, %eax
747	clts				/ clear TS bit in CR0
748	fnstsw	%ax			/ get status
749	fnclex				/ clear processor exceptions
750	ret
751	SET_SIZE(fperr_reset)
752
753	ENTRY_NP(fpxerr_reset)
754	clts				/ clear TS bit in CR0
755	subl	$4, %esp		/ make some temporary space
756	stmxcsr	(%esp)			/ get status
757	movl	(%esp), %eax
758	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
759	ldmxcsr	(%esp)			/ clear processor exceptions
760	addl	$4, %esp
761	ret
762	SET_SIZE(fpxerr_reset)
763
764#endif	/* __i386 */
765#endif	/* __lint */
766
767#if defined(__lint)
768
769uint32_t
770fpgetcwsw(void)
771{
772	return (0);
773}
774
775#else   /* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(fpgetcwsw)
780	pushq	%rbp
781	movq	%rsp, %rbp
782	subq	$0x10, %rsp		/* make some temporary space	*/
783	clts				/* clear TS bit in CR0		*/
784	fnstsw	(%rsp)			/* store the status word	*/
785	fnstcw	2(%rsp)			/* store the control word	*/
786	movl	(%rsp), %eax		/* put both in %eax		*/
787	leave
788	ret
789	SET_SIZE(fpgetcwsw)
790
791#elif defined(__i386)
792
793	ENTRY_NP(fpgetcwsw)
794	clts				/* clear TS bit in CR0		*/
795	subl	$4, %esp		/* make some temporary space	*/
796	fnstsw	(%esp)			/* store the status word	*/
797	fnstcw	2(%esp)			/* store the control word	*/
798	movl	(%esp), %eax		/* put both in %eax		*/
799	addl	$4, %esp
800	ret
801	SET_SIZE(fpgetcwsw)
802
803#endif	/* __i386 */
804#endif  /* __lint */
805
806/*
807 * Returns the MXCSR register.
808 */
809
810#if defined(__lint)
811
812uint32_t
813fpgetmxcsr(void)
814{
815	return (0);
816}
817
818#else   /* __lint */
819
820#if defined(__amd64)
821
822	ENTRY_NP(fpgetmxcsr)
823	pushq	%rbp
824	movq	%rsp, %rbp
825	subq	$0x10, %rsp		/* make some temporary space	*/
826	clts				/* clear TS bit in CR0		*/
827	stmxcsr	(%rsp)			/* get status			*/
828	movl	(%rsp), %eax
829	leave
830	ret
831	SET_SIZE(fpgetmxcsr)
832
833#elif defined(__i386)
834
835	ENTRY_NP(fpgetmxcsr)
836	clts				/* clear TS bit in CR0		*/
837	subl	$4, %esp		/* make some temporary space	*/
838	stmxcsr	(%esp)			/* get status			*/
839	movl	(%esp), %eax
840	addl	$4, %esp
841	ret
842	SET_SIZE(fpgetmxcsr)
843
844#endif	/* __i386 */
845#endif  /* __lint */
846