xref: /titanic_50/usr/src/uts/intel/ia32/ml/float.s (revision b3700b074e637f8c6991b70754c88a2cfffb246b)
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22/*
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
24 */
25
26/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
27/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
28/*        All Rights Reserved   */
29
30/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
31/*        All Rights Reserved   */
32
33/*
34 * Copyright (c) 2009, Intel Corporation.
35 * All rights reserved.
36 */
37
38#include <sys/asm_linkage.h>
39#include <sys/asm_misc.h>
40#include <sys/regset.h>
41#include <sys/privregs.h>
42#include <sys/x86_archext.h>
43
44#if defined(__lint)
45#include <sys/types.h>
46#include <sys/fp.h>
47#else
48#include "assym.h"
49#endif
50
51#if defined(__lint)
52
53uint_t
54fpu_initial_probe(void)
55{ return (0); }
56
57#else	/* __lint */
58
59	/*
60	 * Returns zero if x87 "chip" is present(!)
61	 */
62	ENTRY_NP(fpu_initial_probe)
63	CLTS
64	fninit
65	fnstsw	%ax
66	movzbl	%al, %eax
67	ret
68	SET_SIZE(fpu_initial_probe)
69
70#endif	/* __lint */
71
72#if defined(__lint)
73
74/*ARGSUSED*/
75void
76fxsave_insn(struct fxsave_state *fx)
77{}
78
79#else	/* __lint */
80
81#if defined(__amd64)
82
83	ENTRY_NP(fxsave_insn)
84	FXSAVEQ	((%rdi))
85	ret
86	SET_SIZE(fxsave_insn)
87
88#elif defined(__i386)
89
90	ENTRY_NP(fxsave_insn)
91	movl	4(%esp), %eax
92	fxsave	(%eax)
93	ret
94	SET_SIZE(fxsave_insn)
95
96#endif
97
98#endif	/* __lint */
99
100#if defined(__i386)
101
102/*
103 * If (num1/num2 > num1/num3) the FPU has the FDIV bug.
104 */
105
106#if defined(__lint)
107
108int
109fpu_probe_pentium_fdivbug(void)
110{ return (0); }
111
112#else	/* __lint */
113
114	ENTRY_NP(fpu_probe_pentium_fdivbug)
115	fldl	.num1
116	fldl	.num2
117	fdivr	%st(1), %st
118	fxch	%st(1)
119	fdivl	.num3
120	fcompp
121	fstsw	%ax
122	sahf
123	jae	0f
124	movl	$1, %eax
125	ret
126
1270:	xorl	%eax, %eax
128	ret
129
130	.align	4
131.num1:	.4byte	0xbce4217d	/* 4.999999 */
132	.4byte	0x4013ffff
133.num2:	.4byte	0x0		/* 15.0 */
134	.4byte	0x402e0000
135.num3:	.4byte	0xde7210bf	/* 14.999999 */
136	.4byte	0x402dffff
137	SET_SIZE(fpu_probe_pentium_fdivbug)
138
139#endif	/* __lint */
140
141/*
142 * To cope with processors that do not implement fxsave/fxrstor
143 * instructions, patch hot paths in the kernel to use them only
144 * when that feature has been detected.
145 */
146
147#if defined(__lint)
148
149void
150patch_sse(void)
151{}
152
153void
154patch_sse2(void)
155{}
156
157void
158patch_xsave(void)
159{}
160
161#else	/* __lint */
162
163	ENTRY_NP(patch_sse)
164	_HOT_PATCH_PROLOG
165	/
166	/	frstor (%ebx); nop	-> fxrstor (%ebx)
167	/
168	_HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3)
169	/
170	/	lock; xorl $0, (%esp)	-> sfence; ret
171	/
172	_HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
173	_HOT_PATCH_EPILOG
174	ret
175_fxrstor_ebx_insn:			/ see ndptrap_frstor()
176	fxrstor	(%ebx)
177_ldmxcsr_ebx_insn:			/ see resume_from_zombie()
178	ldmxcsr	(%ebx)
179_sfence_ret_insn:			/ see membar_producer()
180	.byte	0xf, 0xae, 0xf8		/ [sfence instruction]
181	ret
182	SET_SIZE(patch_sse)
183
184	ENTRY_NP(patch_sse2)
185	_HOT_PATCH_PROLOG
186	/
187	/	lock; xorl $0, (%esp)	-> lfence; ret
188	/
189	_HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
190	_HOT_PATCH_EPILOG
191	ret
192_lfence_ret_insn:			/ see membar_consumer()
193	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]
194	ret
195	SET_SIZE(patch_sse2)
196
197	/*
198	 * Patch lazy fp restore instructions in the trap handler
199	 * to use xrstor instead of frstor
200	 */
201	ENTRY_NP(patch_xsave)
202	_HOT_PATCH_PROLOG
203	/
204	/	frstor (%ebx); nop	-> xrstor (%ebx)
205	/
206	_HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3)
207	_HOT_PATCH_EPILOG
208	ret
209_xrstor_ebx_insn:			/ see ndptrap_frstor()
210	#xrstor (%ebx)
211	.byte	0x0f, 0xae, 0x2b
212	SET_SIZE(patch_xsave)
213
214#endif	/* __lint */
215#endif	/* __i386 */
216
217#if defined(__amd64)
218#if defined(__lint)
219
220void
221patch_xsave(void)
222{}
223
224#else	/* __lint */
225
226	/*
227	 * Patch lazy fp restore instructions in the trap handler
228	 * to use xrstor instead of fxrstorq
229	 */
230	ENTRY_NP(patch_xsave)
231	pushq	%rbx
232	pushq	%rbp
233	pushq	%r15
234	/
235	/	FXRSTORQ (%rbx);	-> xrstor (%rbx)
236	/ hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4)
237	/
238	leaq	_patch_xrstorq_rbx(%rip), %rbx
239	leaq	_xrstor_rbx_insn(%rip), %rbp
240	movq	$4, %r15
2411:
242	movq	%rbx, %rdi			/* patch address */
243	movzbq	(%rbp), %rsi			/* instruction byte */
244	movq	$1, %rdx			/* count */
245	call	hot_patch_kernel_text
246	addq	$1, %rbx
247	addq	$1, %rbp
248	subq	$1, %r15
249	jnz	1b
250
251	popq	%r15
252	popq	%rbp
253	popq	%rbx
254	ret
255
256_xrstor_rbx_insn:			/ see ndptrap_frstor()
257	#rex.W=1 (.byte 0x48)
258	#xrstor (%rbx)
259	.byte	0x48, 0x0f, 0xae, 0x2b
260	SET_SIZE(patch_xsave)
261
262#endif	/* __lint */
263#endif	/* __amd64 */
264
265/*
266 * One of these routines is called from any lwp with floating
267 * point context as part of the prolog of a context switch.
268 */
269
270#if defined(__lint)
271
272/*ARGSUSED*/
273void
274xsave_ctxt(void *arg)
275{}
276
277/*ARGSUSED*/
278void
279fpxsave_ctxt(void *arg)
280{}
281
282/*ARGSUSED*/
283void
284fpnsave_ctxt(void *arg)
285{}
286
287#else	/* __lint */
288
289#if defined(__amd64)
290
291	ENTRY_NP(fpxsave_ctxt)
292	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
293	jne	1f
294
295	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
296	FXSAVEQ	(FPU_CTX_FPU_REGS(%rdi))
297
298	/*
299	 * On certain AMD processors, the "exception pointers" i.e. the last
300	 * instruction pointer, last data pointer, and last opcode
301	 * are saved by the fxsave instruction ONLY if the exception summary
302	 * bit is set.
303	 *
304	 * To ensure that we don't leak these values into the next context
305	 * on the cpu, we could just issue an fninit here, but that's
306	 * rather slow and so we issue an instruction sequence that
307	 * clears them more quickly, if a little obscurely.
308	 */
309	btw	$7, FXSAVE_STATE_FSW(%rdi)	/* Test saved ES bit */
310	jnc	0f				/* jump if ES = 0 */
311	fnclex		/* clear pending x87 exceptions */
3120:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
313	fildl	.fpzero_const(%rip)
314			/* dummy load changes all exception pointers */
315	STTS(%rsi)	/* trap on next fpu touch */
3161:	rep;	ret	/* use 2 byte return instruction when branch target */
317			/* AMD Software Optimization Guide - Section 6.2 */
318	SET_SIZE(fpxsave_ctxt)
319
320	ENTRY_NP(xsave_ctxt)
321	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%rdi)
322	jne	1f
323	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi)
324	/*
325	 * Setup xsave flags in EDX:EAX
326	 */
327	movl	FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax
328	movl	FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx
329	leaq	FPU_CTX_FPU_REGS(%rdi), %rsi
330	#xsave	(%rsi)
331	.byte	0x0f, 0xae, 0x26
332
333	/*
334	 * (see notes above about "exception pointers")
335	 * TODO: does it apply to any machine that uses xsave?
336	 */
337	btw	$7, FXSAVE_STATE_FSW(%rdi)	/* Test saved ES bit */
338	jnc	0f				/* jump if ES = 0 */
339	fnclex		/* clear pending x87 exceptions */
3400:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
341	fildl	.fpzero_const(%rip)
342			/* dummy load changes all exception pointers */
343	STTS(%rsi)	/* trap on next fpu touch */
3441:	ret
345	SET_SIZE(xsave_ctxt)
346
347#elif defined(__i386)
348
349	ENTRY_NP(fpnsave_ctxt)
350	movl	4(%esp), %eax		/* a struct fpu_ctx */
351	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
352	jne	1f
353
354	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
355	fnsave	FPU_CTX_FPU_REGS(%eax)
356			/* (fnsave also reinitializes x87 state) */
357	STTS(%edx)	/* trap on next fpu touch */
3581:	rep;	ret	/* use 2 byte return instruction when branch target */
359			/* AMD Software Optimization Guide - Section 6.2 */
360	SET_SIZE(fpnsave_ctxt)
361
362	ENTRY_NP(fpxsave_ctxt)
363	movl	4(%esp), %eax		/* a struct fpu_ctx */
364	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%eax)
365	jne	1f
366
367	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax)
368	fxsave	FPU_CTX_FPU_REGS(%eax)
369			/* (see notes above about "exception pointers") */
370	btw	$7, FXSAVE_STATE_FSW(%eax)	/* Test saved ES bit */
371	jnc	0f				/* jump if ES = 0 */
372	fnclex		/* clear pending x87 exceptions */
3730:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
374	fildl	.fpzero_const
375			/* dummy load changes all exception pointers */
376	STTS(%edx)	/* trap on next fpu touch */
3771:	rep;	ret	/* use 2 byte return instruction when branch target */
378			/* AMD Software Optimization Guide - Section 6.2 */
379	SET_SIZE(fpxsave_ctxt)
380
381	ENTRY_NP(xsave_ctxt)
382	movl	4(%esp), %ecx		/* a struct fpu_ctx */
383	cmpl	$FPU_EN, FPU_CTX_FPU_FLAGS(%ecx)
384	jne	1f
385
386	movl	$_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx)
387	movl	FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax
388	movl	FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx
389	leal	FPU_CTX_FPU_REGS(%ecx), %ecx
390	#xsave	(%ecx)
391	.byte	0x0f, 0xae, 0x21
392
393	/*
394	 * (see notes above about "exception pointers")
395	 * TODO: does it apply to any machine that uses xsave?
396	 */
397	btw	$7, FXSAVE_STATE_FSW(%ecx)	/* Test saved ES bit */
398	jnc	0f				/* jump if ES = 0 */
399	fnclex		/* clear pending x87 exceptions */
4000:	ffree	%st(7)	/* clear tag bit to remove possible stack overflow */
401	fildl	.fpzero_const
402			/* dummy load changes all exception pointers */
403	STTS(%edx)	/* trap on next fpu touch */
4041:	ret
405	SET_SIZE(xsave_ctxt)
406
407#endif	/* __i386 */
408
409	.align	8
410.fpzero_const:
411	.4byte	0x0
412	.4byte	0x0
413
414#endif	/* __lint */
415
416
417#if defined(__lint)
418
419/*ARGSUSED*/
420void
421fpsave(struct fnsave_state *f)
422{}
423
424/*ARGSUSED*/
425void
426fpxsave(struct fxsave_state *f)
427{}
428
429/*ARGSUSED*/
430void
431xsave(struct xsave_state *f, uint64_t m)
432{}
433
434#else	/* __lint */
435
436#if defined(__amd64)
437
438	ENTRY_NP(fpxsave)
439	CLTS
440	FXSAVEQ	((%rdi))
441	fninit				/* clear exceptions, init x87 tags */
442	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
443	ret
444	SET_SIZE(fpxsave)
445
446	ENTRY_NP(xsave)
447	CLTS
448	movl	%esi, %eax		/* bv mask */
449	movq	%rsi, %rdx
450	shrq	$32, %rdx
451	#xsave	(%rdi)
452	.byte	0x0f, 0xae, 0x27
453
454	fninit				/* clear exceptions, init x87 tags */
455	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
456	ret
457	SET_SIZE(xsave)
458
459#elif defined(__i386)
460
461	ENTRY_NP(fpsave)
462	CLTS
463	movl	4(%esp), %eax
464	fnsave	(%eax)
465	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
466	ret
467	SET_SIZE(fpsave)
468
469	ENTRY_NP(fpxsave)
470	CLTS
471	movl	4(%esp), %eax
472	fxsave	(%eax)
473	fninit				/* clear exceptions, init x87 tags */
474	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
475	ret
476	SET_SIZE(fpxsave)
477
478	ENTRY_NP(xsave)
479	CLTS
480	movl	4(%esp), %ecx
481	movl	8(%esp), %eax
482	movl	12(%esp), %edx
483	#xsave	(%ecx)
484	.byte	0x0f, 0xae, 0x21
485
486	fninit				/* clear exceptions, init x87 tags */
487	STTS(%eax)			/* set TS bit in %cr0 (disable FPU) */
488	ret
489	SET_SIZE(xsave)
490
491#endif	/* __i386 */
492#endif	/* __lint */
493
494#if defined(__lint)
495
496/*ARGSUSED*/
497void
498fprestore(struct fnsave_state *f)
499{}
500
501/*ARGSUSED*/
502void
503fpxrestore(struct fxsave_state *f)
504{}
505
506/*ARGSUSED*/
507void
508xrestore(struct xsave_state *f, uint64_t m)
509{}
510
511#else	/* __lint */
512
513#if defined(__amd64)
514
515	ENTRY_NP(fpxrestore)
516	CLTS
517	FXRSTORQ	((%rdi))
518	ret
519	SET_SIZE(fpxrestore)
520
521	ENTRY_NP(xrestore)
522	CLTS
523	movl	%esi, %eax		/* bv mask */
524	movq	%rsi, %rdx
525	shrq	$32, %rdx
526	#xrstor	(%rdi)
527	.byte	0x0f, 0xae, 0x2f
528	ret
529	SET_SIZE(xrestore)
530
531#elif defined(__i386)
532
533	ENTRY_NP(fprestore)
534	CLTS
535	movl	4(%esp), %eax
536	frstor	(%eax)
537	ret
538	SET_SIZE(fprestore)
539
540	ENTRY_NP(fpxrestore)
541	CLTS
542	movl	4(%esp), %eax
543	fxrstor	(%eax)
544	ret
545	SET_SIZE(fpxrestore)
546
547	ENTRY_NP(xrestore)
548	CLTS
549	movl	4(%esp), %ecx
550	movl	8(%esp), %eax
551	movl	12(%esp), %edx
552	#xrstor	(%ecx)
553	.byte	0x0f, 0xae, 0x29
554	ret
555	SET_SIZE(xrestore)
556
557#endif	/* __i386 */
558#endif	/* __lint */
559
560/*
561 * Disable the floating point unit.
562 */
563
564#if defined(__lint)
565
566void
567fpdisable(void)
568{}
569
570#else	/* __lint */
571
572#if defined(__amd64)
573
574	ENTRY_NP(fpdisable)
575	STTS(%rdi)			/* set TS bit in %cr0 (disable FPU) */
576	ret
577	SET_SIZE(fpdisable)
578
579#elif defined(__i386)
580
581	ENTRY_NP(fpdisable)
582	STTS(%eax)
583	ret
584	SET_SIZE(fpdisable)
585
586#endif	/* __i386 */
587#endif	/* __lint */
588
589/*
590 * Initialize the fpu hardware.
591 */
592
593#if defined(__lint)
594
595void
596fpinit(void)
597{}
598
599#else	/* __lint */
600
601#if defined(__amd64)
602
603	ENTRY_NP(fpinit)
604	CLTS
605	cmpl	$FP_XSAVE, fp_save_mech
606	je	1f
607
608	/* fxsave */
609	leaq	sse_initial(%rip), %rax
610	FXRSTORQ	((%rax))		/* load clean initial state */
611	ret
612
6131:	/* xsave */
614	leaq	avx_initial(%rip), %rcx
615	xorl	%edx, %edx
616	movl	$XFEATURE_AVX, %eax
617	bt	$X86FSET_AVX, x86_featureset
618	cmovael	%edx, %eax
619	orl	$(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
620	/* xrstor (%rcx) */
621	.byte	0x0f, 0xae, 0x29		/* load clean initial state */
622	ret
623	SET_SIZE(fpinit)
624
625#elif defined(__i386)
626
627	ENTRY_NP(fpinit)
628	CLTS
629	cmpl	$FP_FXSAVE, fp_save_mech
630	je	1f
631	cmpl	$FP_XSAVE, fp_save_mech
632	je	2f
633
634	/* fnsave */
635	fninit
636	movl	$x87_initial, %eax
637	frstor	(%eax)			/* load clean initial state */
638	ret
639
6401:	/* fxsave */
641	movl	$sse_initial, %eax
642	fxrstor	(%eax)			/* load clean initial state */
643	ret
644
6452:	/* xsave */
646	movl	$avx_initial, %ecx
647	xorl	%edx, %edx
648	movl	$XFEATURE_AVX, %eax
649	bt	$X86FSET_AVX, x86_featureset
650	cmovael	%edx, %eax
651	orl	$(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax
652	/* xrstor (%ecx) */
653	.byte	0x0f, 0xae, 0x29	/* load clean initial state */
654	ret
655	SET_SIZE(fpinit)
656
657#endif	/* __i386 */
658#endif	/* __lint */
659
660/*
661 * Clears FPU exception state.
662 * Returns the FP status word.
663 */
664
665#if defined(__lint)
666
667uint32_t
668fperr_reset(void)
669{ return (0); }
670
671uint32_t
672fpxerr_reset(void)
673{ return (0); }
674
675#else	/* __lint */
676
677#if defined(__amd64)
678
679	ENTRY_NP(fperr_reset)
680	CLTS
681	xorl	%eax, %eax
682	fnstsw	%ax
683	fnclex
684	ret
685	SET_SIZE(fperr_reset)
686
687	ENTRY_NP(fpxerr_reset)
688	pushq	%rbp
689	movq	%rsp, %rbp
690	subq	$0x10, %rsp		/* make some temporary space */
691	CLTS
692	stmxcsr	(%rsp)
693	movl	(%rsp), %eax
694	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
695	ldmxcsr	(%rsp)			/* clear processor exceptions */
696	leave
697	ret
698	SET_SIZE(fpxerr_reset)
699
700#elif defined(__i386)
701
702	ENTRY_NP(fperr_reset)
703	CLTS
704	xorl	%eax, %eax
705	fnstsw	%ax
706	fnclex
707	ret
708	SET_SIZE(fperr_reset)
709
710	ENTRY_NP(fpxerr_reset)
711	CLTS
712	subl	$4, %esp		/* make some temporary space */
713	stmxcsr	(%esp)
714	movl	(%esp), %eax
715	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
716	ldmxcsr	(%esp)			/* clear processor exceptions */
717	addl	$4, %esp
718	ret
719	SET_SIZE(fpxerr_reset)
720
721#endif	/* __i386 */
722#endif	/* __lint */
723
724#if defined(__lint)
725
726uint32_t
727fpgetcwsw(void)
728{
729	return (0);
730}
731
732#else   /* __lint */
733
734#if defined(__amd64)
735
736	ENTRY_NP(fpgetcwsw)
737	pushq	%rbp
738	movq	%rsp, %rbp
739	subq	$0x10, %rsp		/* make some temporary space	*/
740	CLTS
741	fnstsw	(%rsp)			/* store the status word	*/
742	fnstcw	2(%rsp)			/* store the control word	*/
743	movl	(%rsp), %eax		/* put both in %eax		*/
744	leave
745	ret
746	SET_SIZE(fpgetcwsw)
747
748#elif defined(__i386)
749
750	ENTRY_NP(fpgetcwsw)
751	CLTS
752	subl	$4, %esp		/* make some temporary space	*/
753	fnstsw	(%esp)			/* store the status word	*/
754	fnstcw	2(%esp)			/* store the control word	*/
755	movl	(%esp), %eax		/* put both in %eax		*/
756	addl	$4, %esp
757	ret
758	SET_SIZE(fpgetcwsw)
759
760#endif	/* __i386 */
761#endif  /* __lint */
762
763/*
764 * Returns the MXCSR register.
765 */
766
767#if defined(__lint)
768
769uint32_t
770fpgetmxcsr(void)
771{
772	return (0);
773}
774
775#else   /* __lint */
776
777#if defined(__amd64)
778
779	ENTRY_NP(fpgetmxcsr)
780	pushq	%rbp
781	movq	%rsp, %rbp
782	subq	$0x10, %rsp		/* make some temporary space */
783	CLTS
784	stmxcsr	(%rsp)
785	movl	(%rsp), %eax
786	leave
787	ret
788	SET_SIZE(fpgetmxcsr)
789
790#elif defined(__i386)
791
792	ENTRY_NP(fpgetmxcsr)
793	CLTS
794	subl	$4, %esp		/* make some temporary space */
795	stmxcsr	(%esp)
796	movl	(%esp), %eax
797	addl	$4, %esp
798	ret
799	SET_SIZE(fpgetmxcsr)
800
801#endif	/* __i386 */
802#endif  /* __lint */
803