xref: /titanic_50/usr/src/uts/intel/ia32/ml/float.s (revision 8461248208fabd3a8230615f8615e5bf1b4dcdcb)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License").  You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22/*
23 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24 * Use is subject to license terms.
25 */
26
27/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
28/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
29/*        All Rights Reserved   */
30
31/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
32/*        All Rights Reserved   */
33
34#pragma ident	"%Z%%M%	%I%	%E% SMI"
35
36#include <sys/asm_linkage.h>
37#include <sys/regset.h>
38#include <sys/privregs.h>
39#include <sys/x86_archext.h>
40
41#if defined(__lint)
42#include <sys/types.h>
43#include <sys/fp.h>
44#else
45#include "assym.h"
46#endif
47
48#if defined(__lint)
49
50int fpu_exists = 1;
51int fp_kind = FP_387;
52int fpu_ignored = 0;
53
54int use_sse_pagecopy = 0;
55int use_sse_pagezero = 0;
56int use_sse_copy = 0;
57
58#if defined(__i386)
59
60int fpu_pentium_fdivbug = 0;
61
62#endif
63
64#else	/* __lint */
65
66	/*
67	 * If fpu_exists is non-zero, fpu_probe will attempt to use any
68	 * hardware FPU (subject to other constraints, see below).  If
69	 * fpu_exists is zero, fpu_probe will report that there is no
70	 * FPU even if there is one.
71	 */
72	DGDEF3(fpu_exists, 4, 4)
73	.long	1
74
75	DGDEF3(fp_kind, 4, 4)
76	.long	FP_387		/* FP_NO, FP_287, FP_387, etc. */
77
78	/*
79	 * The variable fpu_ignored is provided to allow other code to
80	 * determine whether emulation is being done because there is
81	 * no FPU or because of an override requested via /etc/system.
82	 */
83	DGDEF3(fpu_ignored, 4, 4)
84	.long	0
85
86	/*
87	 * Used by ppcopy, ppzero, and xcopyin to determine whether or not
88	 * to use the SSE-based routines
89	 */
90	DGDEF3(use_sse_pagecopy, 4, 4)
91	.long	0
92
93	DGDEF3(use_sse_pagezero, 4, 4)
94	.long	0
95
96	DGDEF3(use_sse_copy, 4, 4)
97	.long	0
98
99#if defined(__i386)
100
101	/*
102	 * The variable fpu_pentium_fdivbug is provided to allow other code to
103	 * determine whether the system contains a Pentium with the FDIV
104	 * problem.
105	 */
106	DGDEF3(fpu_pentium_fdivbug, 4, 4)
107	.long	0
108
109	/*
110	 * The following constants are used for detecting the Pentium
111	 * divide bug.
112	 */
113	.align	4
114num1:	.4byte	0xbce4217d	/* 4.999999 */
115	.4byte	0x4013ffff
116num2:	.4byte	0x0		/* 15.0 */
117	.4byte	0x402e0000
118num3:	.4byte	0xde7210bf	/* 14.999999 */
119	.4byte	0x402dffff
120
121#endif	/* __i386 */
122#endif	/* __lint */
123
124/*
125 * FPU probe - check if we have any FP chip present by trying to do a reset.
126 * If that succeeds, differentiate via cr0. Called from autoconf.
127 */
128
129#if defined(__lint)
130
131/*ARGSUSED*/
132void
133fpu_probe(void)
134{}
135
136#else	/* __lint */
137
138#if defined(__amd64)
139
140	ENTRY_NP(fpu_probe)
141	pushq	%rbp
142	movq	%rsp, %rbp
143	clts				/* clear task switched bit in CR0 */
144	fninit				/* initialize chip */
145	fnstsw	%ax			/* get status */
146	orb	%al, %al		/* status zero? 0 = chip present */
147	jnz	no_fpu_hw
148
149	/*
150	 * Ignore the FPU if fp_exists == 0
151	 */
152	cmpl	$0, fpu_exists(%rip)
153	je	ignore_fpu
154
155	/*
156	 * we have a chip of some sort; use cr0 to differentiate
157	 */
158	movq	%cr0, %rdx		/* check for fpu present flag */
159	testl	$CR0_ET, %edx
160	jz	no_fpu_hw		/* z -> fpu not present */
161	testl	$X86_SSE, x86_feature(%rip)
162	je	no_fpu_hw		/* SSE is utterly required */
163	testl	$X86_SSE2, x86_feature(%rip)
164	je	no_fpu_hw		/* SSE2 too .. */
165	movl	$__FP_SSE, fp_kind(%rip)
166
167	/*
168	 * Tell the processor what we're doing via %cr4
169	 */
170	movq	%cr4, %rax
171	orq	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax
172	movq	%rax, %cr4
173
174	/*
175	 * make other CPUs share the same cr4 settings
176	 */
177	orq	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip)
178
179	/*
180	 * extract the MXCSR_MASK field from our first fxsave
181	 */
182	subq	$FXSAVE_STATE_SIZE, %rsp
183	movl	$0, FXSAVE_STATE_MXCSR_MASK(%rsp)
184	fxsave	(%rsp)
185	movl	FXSAVE_STATE_MXCSR_MASK(%rsp), %eax
186	cmpl	$0, %eax
187	je	1f			/* default mask value set in fpu.c */
188	movl	%eax, sse_mxcsr_mask(%rip) /* override mask set here */
1891:
190	movq	%cr0, %rax
191	andq	$_BITNOT(CR0_TS|CR0_EM), %rdx	/* clear emulate math bit */
192	orq	$_CONST(CR0_MP|CR0_NE), %rdx
193
194	/*
195	 * We have SSE and SSE2 so enable the extensions for
196	 * non-temporal copies and stores.
197	 */
198	movl	$1, use_sse_pagecopy
199	movl	$1, use_sse_pagezero
200	movl	$1, use_sse_copy
201
202	jmp	done
203
204	/*
205	 * Do not use the FPU at all
206	 */
207ignore_fpu:
208	movl	$1, fpu_ignored(%rip)
209
210	/*
211	 * No FPU hardware present
212	 */
213no_fpu_hw:
214	andq	$_BITNOT(CR0_MP), %rdx	/* clear math chip present */
215	orq	$CR0_EM, %rdx		/* set emulate math bit */
216	movl	$FP_NO, fp_kind(%rip)	/* signify that there is no FPU */
217	movl	$0, fpu_exists(%rip)	/* no FPU present */
218	/*
219	 * Disable the XMM-related gorp too, in case the BIOS set them
220	 */
221	movq	%cr4, %rax
222	andq	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax
223	movq	%rax, %cr4
224	andq	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip)
225
226done:
227	movq	%rdx, %cr0		/* set machine status word */
228	leave
229	ret
230	SET_SIZE(fpu_probe)
231
232#elif defined(__i386)
233
234	ENTRY_NP(fpu_probe)
235	clts				/ clear task switched bit in CR0
236	fninit				/ initialize chip
237	fnstsw	%ax			/ get status
238	orb	%al, %al		/ status zero? 0 = chip present
239	jnz	no_fpu_hw		/ no, use emulator
240/
241/ If there is an FP, look for the Pentium FDIV problem even if we
242/ do not plan to use it.  Set fpu_pentium_fdivbug is a bad FPU is
243/ detected.  Subsequent code can report the result if desired.
244/
245/ If (num1/num2 > num1/num3) the FPU has the FDIV bug.
246/
247	fldl	num1
248	fldl	num2
249	fdivr	%st(1), %st
250	fxch	%st(1)
251	fdivl	num3
252	fcompp
253	fstsw	%ax
254	sahf
255	jae	no_bug
256	movl	$1, fpu_pentium_fdivbug
257no_bug:
258/
259/ Repeat the earlier initialization sequence so that the FPU is left in
260/ the expected state.
261/
262	fninit
263	fnstsw	%ax
264/
265/ Ignore the FPU if fpu_exists == 0
266/
267	cmpl	$0, fpu_exists
268	je	ignore_fpu
269/
270/ Ignore the FPU if it has the Pentium bug
271/
272	cmpl	$0, fpu_pentium_fdivbug
273	jne	ignore_fpu
274/
275/ at this point we know we have a chip of some sort;
276/ use cr0 to differentiate.
277/
278	movl    %cr0, %edx		/ check for 387 present flag
279	testl	$CR0_ET, %edx		/ ...
280	jz	is287			/ z -> 387 not present
281	movl	$FP_387, fp_kind	/ we have a 387 or later chip
282/
283/ clear the "XMM supported" bits in %cr4 in case the BIOS set them
284/ erroneously -- see 4965674
285/
286	movl	%cr4, %eax
287	andl	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
288	movl	%eax, %cr4
289	andl	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value
290
291	testl	$X86_SSE, x86_feature	/ can we do SSE?
292	je	mathchip
293/
294/ aha .. we have an SSE-capable chip
295/
296/ - set fpsave_begin to fpxsave_begin
297/ - hot patch performance critical code to use fxsave/fxrstor directly,
298/   and hot patch membar_producer() to use sfence instead of lock
299/ - tell the processor what we're doing via %cr4
300/ - allow fully fledged #XM exceptions to be generated by SSE/SSE2
301/   (the default mask set in fpinit() disables them)
302/ - determine the mxcsr_mask so we can avoid setting reserved bits
303/
304	movl	$__FP_SSE, fp_kind
305	movl	$fpxsave_begin, %eax
306	movl	%eax, fpsave_begin
307	call	patch_sse
308	mov	%cr4, %eax
309	orl	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
310	mov	%eax, %cr4
311/
312/ make other CPUs share the same cr4 settings
313/
314	orl	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value
315/
316/ extract the MXCSR_MASK field from our first fxsave
317/
318	subl	$FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
319	movl	%esp, %eax
320	addl	$XMM_ALIGN, %eax
321	andl	$_BITNOT(XMM_ALIGN-1), %eax	/* 16-byte alignment */
322	movl	$0, FXSAVE_STATE_MXCSR_MASK(%eax)
323	fxsave	(%eax)
324	movl	FXSAVE_STATE_MXCSR_MASK(%eax), %eax
325	addl	$FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
326	cmpl	$0, %eax
327	je	1f			/ default mask value set in fpu.c
328	movl	%eax, sse_mxcsr_mask	/ override mask set here
3291:	testl	$X86_SSE2, x86_feature	/ can we do SSE2?
330	je	mathchip
331/
332/ aha .. we have an SSE2-capable chip
333/
334/ - enable pagezero and pagecopy using non-temporal instructions
335/ - hot patch membar_consumer() to use lfence instead of lock
336/
337	movl	$1, use_sse_pagecopy	/ will now call hwblkpagecopy
338	movl	$1, use_sse_pagezero	/ will now call hwblkclr
339	movl	$1, use_sse_copy
340	call	patch_sse2
341	jmp	mathchip
342/
343/ No 387; we must have an 80287.
344/
345is287:
346#if !defined(__GNUC_AS__)
347	fsetpm				/ set the 80287 into protected mode
348	movl	$FP_287, fp_kind	/ we have a 287 chip
349#else
350	movl	$FP_NO, fp_kind		/ maybe just explode here instead?
351#endif
352/
353/ We have either a 287, 387, 486 or P5.
354/ Setup cr0 to reflect the FPU hw type.
355/
356mathchip:
357	movl	%cr0, %edx
358	andl	$_BITNOT(CR0_TS|CR0_EM), %edx	/* clear emulate math bit */
359	orl	$_CONST(CR0_MP|CR0_NE), %edx
360	jmp	cont
361
362/ Do not use the FPU
363ignore_fpu:
364	movl	$1, fpu_ignored
365/ No FP hw present.
366no_fpu_hw:
367	movl	%cr0, %edx
368	andl	$_BITNOT(CR0_MP), %edx	/* clear math chip present */
369	movl	$FP_NO, fp_kind		/ signify that there is no FPU
370	movl	$0, fpu_exists		/ no FPU present
371cont:
372	movl	%edx, %cr0		/ set machine status word
373	ret
374	SET_SIZE(fpu_probe)
375
376#define	HOT_PATCH(srcaddr, dstaddr, size)	\
377	movl	$srcaddr, %esi;			\
378	movl	$dstaddr, %edi;			\
379	movl	$size, %ebx;			\
3800:	pushl	$1;				\
381	movzbl	(%esi), %eax;			\
382	pushl	%eax;				\
383	pushl	%edi;				\
384	call	hot_patch_kernel_text;		\
385	addl	$12, %esp;			\
386	inc	%edi;				\
387	inc	%esi;				\
388	dec	%ebx;				\
389	test	%ebx, %ebx;			\
390	jne	0b
391
392	/*
393	 * To cope with processors that do not implement fxsave/fxrstor
394	 * instructions, patch hot paths in the kernel to use them only
395	 * when that feature has been detected.
396	 */
397	ENTRY_NP(patch_sse)
398	push	%ebp
399	mov	%esp, %ebp
400	push	%ebx
401	push	%esi
402	push	%edi
403	/
404	/	frstor (%eax); nop	-> fxrstor (%eax)
405	/
406	HOT_PATCH(_fxrstor_eax_insn, _patch_fxrstor_eax, 3)
407	/
408	/	nop; nop; nop		-> ldmxcsr (%ebx)
409	/
410	HOT_PATCH(_ldmxcsr_ebx_insn, _patch_ldmxcsr_ebx, 3)
411	/
412	/	lock; xorl $0, (%esp)	-> sfence; ret
413	/
414	HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
415	pop	%edi
416	pop	%esi
417	pop	%ebx
418	mov	%ebp, %esp
419	pop	%ebp
420	ret
421_fxrstor_eax_insn:			/ see ndptrap_frstor()
422	fxrstor	(%eax)
423_ldmxcsr_ebx_insn:			/ see resume_from_zombie()
424	ldmxcsr	(%ebx)
425_sfence_ret_insn:			/ see membar_producer()
426	.byte	0xf, 0xae, 0xf8		/ [sfence instruction]
427	ret
428	SET_SIZE(patch_sse)
429
430	/*
431	 * Ditto, but this time for functions that depend upon SSE2 extensions
432	 */
433	ENTRY_NP(patch_sse2)
434	push	%ebp
435	mov	%esp, %ebp
436	push	%ebx
437	push	%esi
438	push	%edi
439	/
440	/	lock; xorl $0, (%esp)	-> lfence; ret
441	/
442	HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
443	pop	%edi
444	pop	%esi
445	pop	%ebx
446	mov	%ebp, %esp
447	pop	%ebp
448	ret
449_lfence_ret_insn:			/ see membar_consumer()
450	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
451	ret
452	SET_SIZE(patch_sse2)
453
454#endif	/* __i386 */
455#endif	/* __lint */
456
457
458/*
459 * One of these routines is called from any lwp with floating
460 * point context as part of the prolog of a context switch; the
461 * routine starts the floating point state save operation.
462 * The completion of the save is forced by an fwait just before
463 * we truly switch contexts..
464 */
465
466#if defined(__lint)
467
468/*ARGSUSED*/
469void
470fpnsave_begin(void *arg)
471{}
472
473/*ARGSUSED*/
474void
475fpxsave_begin(void *arg)
476{}
477
478#else	/* __lint */
479
480#if defined(__amd64)
481
482	ENTRY_NP(fpxsave_begin)
483	movl	FPU_CTX_FPU_FLAGS(%rdi), %edx
484	cmpl	$FPU_EN, %edx
485	jne	1f
486#if FPU_CTX_FPU_REGS != 0
487	addq	FPU_CTX_FPU_REGS, %rdi
488#endif
489	fxsave	(%rdi)
490	fnclex				/* clear pending x87 exceptions */
4911:	ret
492	SET_SIZE(fpxsave_begin)
493
494#elif defined(__i386)
495
496	ENTRY_NP(fpnsave_begin)
497	mov	4(%esp), %eax		/ a struct fpu_ctx *
498	mov	FPU_CTX_FPU_FLAGS(%eax), %edx
499	cmpl	$FPU_EN, %edx
500	jne	1f
501#if FPU_CTX_FPU_REGS != 0
502	addl	FPU_CTX_FPU_REGS, %eax
503#endif
504	fnsave	(%eax)
5051:	ret
506	SET_SIZE(fpnsave_begin)
507
508	ENTRY_NP(fpxsave_begin)
509	mov	4(%esp), %eax		/ a struct fpu_ctx *
510	mov	FPU_CTX_FPU_FLAGS(%eax), %edx
511	cmpl	$FPU_EN, %edx
512	jne	1f
513#if FPU_CTX_FPU_REGS != 0
514	addl	FPU_CTX_FPU_REGS, %eax
515#endif
516	fxsave	(%eax)
517	fnclex				/ Clear pending x87 exceptions
5181:	ret
519	SET_SIZE(fpxsave_begin)
520
521#endif	/* __i386 */
522#endif	/* __lint */
523
524#if defined(__lint)
525
526/*ARGSUSED*/
527void
528fpsave(struct fnsave_state *f)
529{}
530
531/*ARGSUSED*/
532void
533fpxsave(struct fxsave_state *f)
534{}
535
536#else	/* __lint */
537
538#if defined(__amd64)
539
540	ENTRY_NP(fpxsave)
541	clts				/* clear TS bit in CR0 */
542	fxsave	(%rdi)
543	fnclex				/* clear pending x87 exceptions */
544	fwait				/* wait for completion */
545	fninit				/* emulate fnsave: init x87 tags */
546	movq	%cr0, %rax
547	orq	$CR0_TS, %rax
548	movq	%rax, %cr0		/* set TS bit in CR0 (disable FPU) */
549	ret
550	SET_SIZE(fpxsave)
551
552#elif defined(__i386)
553
554	ENTRY_NP(fpsave)
555	clts				/ clear TS bit in CR0
556	movl	4(%esp), %eax		/ load save address
557	fnsave	(%eax)
558	fwait				/ wait for completion
559	movl	%cr0, %eax
560	orl	$CR0_TS, %eax
561	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
562	ret
563	SET_SIZE(fpsave)
564
565	ENTRY_NP(fpxsave)
566	clts				/ clear TS bit in CR0
567	movl	4(%esp), %eax		/ save address
568	fxsave	(%eax)
569	fnclex				/ Clear pending x87 exceptions
570	fwait				/ wait for completion
571	fninit				/ emulate fnsave: init x87 tag words
572	mov	%cr0, %eax
573	orl	$CR0_TS, %eax
574	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
575	ret
576	SET_SIZE(fpxsave)
577
578#endif	/* __i386 */
579#endif	/* __lint */
580
581#if defined(__lint)
582
583/*ARGSUSED*/
584void
585fprestore(struct fnsave_state *f)
586{}
587
588/*ARGSUSED*/
589void
590fpxrestore(struct fxsave_state *f)
591{}
592
593#else	/* __lint */
594
595#if defined(__amd64)
596
597	ENTRY_NP(fpxrestore)
598	clts				/* clear TS bit in CR0 */
599	fxrstor	(%rdi)
600	ret
601	SET_SIZE(fpxrestore)
602
603#elif defined(__i386)
604
605	ENTRY_NP(fprestore)
606	clts				/ clear TS bit in CR0
607	movl	4(%esp), %eax		/ load restore address
608	frstor	(%eax)
609	ret
610	SET_SIZE(fprestore)
611
612	ENTRY_NP(fpxrestore)
613	clts				/ clear TS bit in CR0
614	movl	4(%esp), %eax		/ load restore address
615	fxrstor	(%eax)
616	ret
617	SET_SIZE(fpxrestore)
618
619#endif	/* __i386 */
620#endif	/* __lint */
621
622/*
623 * Disable the floating point unit.
624 */
625
626#if defined(__lint)
627
628void
629fpdisable(void)
630{}
631
632#else	/* __lint */
633
634#if defined(__amd64)
635
636	ENTRY_NP(fpdisable)
637	movq	%cr0, %rax
638	orq	$CR0_TS, %rax
639	movq	%rax, %cr0		/* set TS bit in CR0 (disable FPU) */
640	ret
641	SET_SIZE(fpdisable)
642
643#elif defined(__i386)
644
645	ENTRY_NP(fpdisable)
646	movl	%cr0, %eax
647	orl	$CR0_TS, %eax
648	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
649	ret
650	SET_SIZE(fpdisable)
651
652#endif	/* __i386 */
653#endif	/* __lint */
654
655/*
656 * Initialize the fpu hardware.
657 */
658
659#if defined(__lint)
660
661void
662fpinit(void)
663{}
664
665#else	/* __lint */
666
667#if defined(__amd64)
668
669	ENTRY_NP(fpinit)
670	clts				/* clear TS bit in CR0 */
671	leaq	sse_initial(%rip), %rax
672	fxrstor	(%rax)			/* load clean initial state */
673	ret
674	SET_SIZE(fpinit)
675
676#elif defined(__i386)
677
678	ENTRY_NP(fpinit)
679	clts				/ clear TS bit in CR0
680	cmpl	$__FP_SSE, fp_kind
681	je	1f
682
683	fninit				/ initialize the chip
684	movl	$x87_initial, %eax
685	frstor	(%eax)			/ load clean initial state
686	ret
6871:
688	movl	$sse_initial, %eax
689	fxrstor	(%eax)			/ load clean initial state
690	ret
691	SET_SIZE(fpinit)
692
693#endif	/* __i386 */
694#endif	/* __lint */
695
696/*
697 * Clears FPU exception state.
698 * Returns the FP status word.
699 */
700
701#if defined(__lint)
702
703uint32_t
704fperr_reset(void)
705{
706	return (0);
707}
708
709uint32_t
710fpxerr_reset(void)
711{
712	return (0);
713}
714
715#else	/* __lint */
716
717#if defined(__amd64)
718
719	ENTRY_NP(fperr_reset)
720	xorl	%eax, %eax
721	clts				/* clear TS bit in CR0 */
722	fnstsw	%ax			/* get status */
723	fnclex				/* clear processor exceptions */
724	ret
725	SET_SIZE(fperr_reset)
726
727	ENTRY_NP(fpxerr_reset)
728	pushq	%rbp
729	movq	%rsp, %rbp
730	subq	$0x10, %rsp		/* make some temporary space */
731	clts				/* clear TS bit in CR0 */
732	stmxcsr	(%rsp)			/* get status */
733	movl	(%rsp), %eax
734	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
735	ldmxcsr	(%rsp)			/* clear processor exceptions */
736	leave
737	ret
738	SET_SIZE(fpxerr_reset)
739
740#elif defined(__i386)
741
742	ENTRY_NP(fperr_reset)
743	xorl	%eax, %eax
744	clts				/ clear TS bit in CR0
745	fnstsw	%ax			/ get status
746	fnclex				/ clear processor exceptions
747	ret
748	SET_SIZE(fperr_reset)
749
750	ENTRY_NP(fpxerr_reset)
751	clts				/ clear TS bit in CR0
752	subl	$4, %esp		/ make some temporary space
753	stmxcsr	(%esp)			/ get status
754	movl	(%esp), %eax
755	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
756	ldmxcsr	(%esp)			/ clear processor exceptions
757	addl	$4, %esp
758	ret
759	SET_SIZE(fpxerr_reset)
760
761#endif	/* __i386 */
762#endif	/* __lint */
763
764#if defined(__lint)
765
766uint32_t
767fpgetcwsw(void)
768{
769	return (0);
770}
771
772#else   /* __lint */
773
774#if defined(__amd64)
775
776	ENTRY_NP(fpgetcwsw)
777	pushq	%rbp
778	movq	%rsp, %rbp
779	subq	$0x10, %rsp		/* make some temporary space	*/
780	clts				/* clear TS bit in CR0		*/
781	fnstsw	(%rsp)			/* store the status word	*/
782	fnstcw	2(%rsp)			/* store the control word	*/
783	movl	(%rsp), %eax		/* put both in %eax		*/
784	leave
785	ret
786	SET_SIZE(fpgetcwsw)
787
788#elif defined(__i386)
789
790	ENTRY_NP(fpgetcwsw)
791	clts				/* clear TS bit in CR0		*/
792	subl	$4, %esp		/* make some temporary space	*/
793	fnstsw	(%esp)			/* store the status word	*/
794	fnstcw	2(%esp)			/* store the control word	*/
795	movl	(%esp), %eax		/* put both in %eax		*/
796	addl	$4, %esp
797	ret
798	SET_SIZE(fpgetcwsw)
799
800#endif	/* __i386 */
801#endif  /* __lint */
802
803/*
804 * Returns the MXCSR register.
805 */
806
807#if defined(__lint)
808
809uint32_t
810fpgetmxcsr(void)
811{
812	return (0);
813}
814
815#else   /* __lint */
816
817#if defined(__amd64)
818
819	ENTRY_NP(fpgetmxcsr)
820	pushq	%rbp
821	movq	%rsp, %rbp
822	subq	$0x10, %rsp		/* make some temporary space	*/
823	clts				/* clear TS bit in CR0		*/
824	stmxcsr	(%rsp)			/* get status			*/
825	movl	(%rsp), %eax
826	leave
827	ret
828	SET_SIZE(fpgetmxcsr)
829
830#elif defined(__i386)
831
832	ENTRY_NP(fpgetmxcsr)
833	clts				/* clear TS bit in CR0		*/
834	subl	$4, %esp		/* make some temporary space	*/
835	stmxcsr	(%esp)			/* get status			*/
836	movl	(%esp), %eax
837	addl	$4, %esp
838	ret
839	SET_SIZE(fpgetmxcsr)
840
841#endif	/* __i386 */
842#endif  /* __lint */
843