/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License, Version 1.0 only
 * (the "License").  You may not use this file except in compliance
 * with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */
/*
 * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

/*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
/*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T   */
/*        All Rights Reserved   */

/*      Copyright (c) 1987, 1988 Microsoft Corporation  */
/*        All Rights Reserved   */

#pragma ident	"%Z%%M%	%I%	%E% SMI"

#include <sys/asm_linkage.h>
#include <sys/regset.h>
#include <sys/privregs.h>
#include <sys/x86_archext.h>

#if defined(__lint)
#include <sys/types.h>
#include <sys/fp.h>
#else
#include "assym.h"
#endif

#if defined(__lint)

int fpu_exists = 1;
int fp_kind = FP_387;
int fpu_ignored = 0;

int use_sse_pagecopy = 0;
int use_sse_pagezero = 0;
int use_sse_copy = 0;

#if defined(__i386)

int fpu_pentium_fdivbug = 0;

#endif

#else	/* __lint */

	/*
	 * If fpu_exists is non-zero, fpu_probe will attempt to use any
	 * hardware FPU (subject to other constraints, see below).  If
	 * fpu_exists is zero, fpu_probe will report that there is no
	 * FPU even if there is one.
	 */
	DGDEF3(fpu_exists, 4, 4)
	.long	1

	DGDEF3(fp_kind, 4, 4)
	.long	FP_387		/* FP_NO, FP_287, FP_387, etc. */

	/*
	 * The variable fpu_ignored is provided to allow other code to
	 * determine whether emulation is being done because there is
	 * no FPU or because of an override requested via /etc/system.
	 */
	DGDEF3(fpu_ignored, 4, 4)
	.long	0

	/*
	 * Used by ppcopy, ppzero, and xcopyin to determine whether or not
	 * to use the SSE-based routines
	 */
	DGDEF3(use_sse_pagecopy, 4, 4)
	.long	0

	DGDEF3(use_sse_pagezero, 4, 4)
	.long	0

	DGDEF3(use_sse_copy, 4, 4)
	.long	0

#if defined(__i386)

	/*
	 * The variable fpu_pentium_fdivbug is provided to allow other code to
	 * determine whether the system contains a Pentium with the FDIV
	 * problem.
	 */
	DGDEF3(fpu_pentium_fdivbug, 4, 4)
	.long	0

	/*
	 * The following constants are used for detecting the Pentium
	 * divide bug.
	 */
	.align	4
num1:	.4byte	0xbce4217d	/* 4.999999 */
	.4byte	0x4013ffff
num2:	.4byte	0x0		/* 15.0 */
	.4byte	0x402e0000
num3:	.4byte	0xde7210bf	/* 14.999999 */
	.4byte	0x402dffff

#endif	/* __i386 */
#endif	/* __lint */

/*
 * FPU probe - check if we have any FP chip present by trying to do a reset.
 * If that succeeds, differentiate via cr0. Called from autoconf.
 */

#if defined(__lint)
 
/*ARGSUSED*/
void
fpu_probe(void)
{}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fpu_probe)
	pushq	%rbp
	movq	%rsp, %rbp
	clts				/* clear task switched bit in CR0 */
	fninit				/* initialize chip */
	fnstsw	%ax			/* get status */
	orb	%al, %al		/* status zero? 0 = chip present */
	jnz	no_fpu_hw

	/*
	 * Ignore the FPU if fp_exists == 0
	 */
	cmpl	$0, fpu_exists(%rip)
	je	ignore_fpu

	/*
	 * we have a chip of some sort; use cr0 to differentiate
	 */
	movq	%cr0, %rdx		/* check for fpu present flag */
	testl	$CR0_ET, %edx
	jz	no_fpu_hw		/* z -> fpu not present */
	testl	$X86_SSE, x86_feature(%rip)
	je	no_fpu_hw		/* SSE is utterly required */
	testl	$X86_SSE2, x86_feature(%rip)
	je	no_fpu_hw		/* SSE2 too .. */
	movl	$__FP_SSE, fp_kind(%rip)

	/*
	 * Tell the processor what we're doing via %cr4
	 */
	movq	%cr4, %rax
	orq	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax
	movq	%rax, %cr4

	/*
	 * make other CPUs share the same cr4 settings
	 */
	orq	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip)

	/*
	 * extract the MXCSR_MASK field from our first fxsave
	 */
	subq	$FXSAVE_STATE_SIZE, %rsp
	movl	$0, FXSAVE_STATE_MXCSR_MASK(%rsp)
	fxsave	(%rsp)
	movl	FXSAVE_STATE_MXCSR_MASK(%rsp), %eax
	cmpl	$0, %eax
	je	1f			/* default mask value set in fpu.c */
	movl	%eax, sse_mxcsr_mask(%rip) /* override mask set here */
1:
	movq	%cr0, %rax
	andq	$_BITNOT(CR0_TS|CR0_EM), %rdx	/* clear emulate math bit */
	orq	$_CONST(CR0_MP|CR0_NE), %rdx

	/*
	 * We have SSE and SSE2 so enable the extensions for
	 * non-temporal copies and stores.
	 */
	movl	$1, use_sse_pagecopy
	movl	$1, use_sse_pagezero
	movl	$1, use_sse_copy

	jmp	done

	/*
	 * Do not use the FPU at all
	 */
ignore_fpu:
	movl	$1, fpu_ignored(%rip)

	/*
	 * No FPU hardware present
	 */
no_fpu_hw:
	andq	$_BITNOT(CR0_MP), %rdx	/* clear math chip present */
	orq	$CR0_EM, %rdx		/* set emulate math bit */
	movl	$FP_NO, fp_kind(%rip)	/* signify that there is no FPU */
	movl	$0, fpu_exists(%rip)	/* no FPU present */
	/*
	 * Disable the XMM-related gorp too, in case the BIOS set them
	 */
	movq	%cr4, %rax
	andq	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax
	movq	%rax, %cr4
	andq	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip)

done:
	movq	%rdx, %cr0		/* set machine status word */
	leave
	ret
	SET_SIZE(fpu_probe)

#elif defined(__i386)

	ENTRY_NP(fpu_probe)
	clts				/ clear task switched bit in CR0
	fninit				/ initialize chip
	fnstsw	%ax			/ get status
	orb	%al, %al		/ status zero? 0 = chip present
	jnz	no_fpu_hw		/ no, use emulator
/
/ If there is an FP, look for the Pentium FDIV problem even if we
/ do not plan to use it.  Set fpu_pentium_fdivbug is a bad FPU is
/ detected.  Subsequent code can report the result if desired.
/
/ If (num1/num2 > num1/num3) the FPU has the FDIV bug.
/
	fldl	num1
	fldl	num2
	fdivr	%st(1), %st
	fxch	%st(1)
	fdivl	num3
	fcompp
	fstsw	%ax
	sahf
	jae	no_bug
	movl	$1, fpu_pentium_fdivbug
no_bug:
/
/ Repeat the earlier initialization sequence so that the FPU is left in
/ the expected state.
/
	fninit
	fnstsw	%ax
/
/ Ignore the FPU if fpu_exists == 0
/
	cmpl	$0, fpu_exists
	je	ignore_fpu
/
/ Ignore the FPU if it has the Pentium bug
/
	cmpl	$0, fpu_pentium_fdivbug
	jne	ignore_fpu
/
/ at this point we know we have a chip of some sort; 
/ use cr0 to differentiate.
/
	movl    %cr0, %edx		/ check for 387 present flag
	testl	$CR0_ET, %edx		/ ...
	jz	is287			/ z -> 387 not present
	movl	$FP_387, fp_kind	/ we have a 387 or later chip
/
/ clear the "XMM supported" bits in %cr4 in case the BIOS set them
/ erroneously -- see 4965674
/
	movl	%cr4, %eax
	andl	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
	movl	%eax, %cr4
	andl	$_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value

	testl	$X86_SSE, x86_feature	/ can we do SSE?
	je	mathchip
/
/ aha .. we have an SSE-capable chip
/
/ - set fpsave_begin to fpxsave_begin
/ - hot patch performance critical code to use fxsave/fxrstor directly,
/   and hot patch membar_producer() to use sfence instead of lock
/ - tell the processor what we're doing via %cr4
/ - allow fully fledged #XM exceptions to be generated by SSE/SSE2
/   (the default mask set in fpinit() disables them)
/ - determine the mxcsr_mask so we can avoid setting reserved bits
/	
	movl	$__FP_SSE, fp_kind
	movl	$fpxsave_begin, %eax
	movl	%eax, fpsave_begin
	call	patch_sse
	mov	%cr4, %eax
	orl	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax
	mov	%eax, %cr4
/
/ make other CPUs share the same cr4 settings
/
	orl	$_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value
/
/ extract the MXCSR_MASK field from our first fxsave 
/
	subl	$FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
	movl	%esp, %eax
	addl	$XMM_ALIGN, %eax
	andl	$_BITNOT(XMM_ALIGN-1), %eax	/* 16-byte alignment */
	movl	$0, FXSAVE_STATE_MXCSR_MASK(%eax)
	fxsave	(%eax)
	movl	FXSAVE_STATE_MXCSR_MASK(%eax), %eax
	addl	$FXSAVE_STATE_SIZE + XMM_ALIGN, %esp
	cmpl	$0, %eax
	je	1f			/ default mask value set in fpu.c
	movl	%eax, sse_mxcsr_mask	/ override mask set here
1:	testl	$X86_SSE2, x86_feature	/ can we do SSE2?
	je	mathchip
/
/ aha .. we have an SSE2-capable chip
/
/ - enable pagezero and pagecopy using non-temporal instructions
/ - hot patch membar_consumer() to use lfence instead of lock
/
	movl	$1, use_sse_pagecopy	/ will now call hwblkpagecopy
	movl	$1, use_sse_pagezero	/ will now call hwblkclr
	movl	$1, use_sse_copy
	call	patch_sse2
	jmp	mathchip
/
/ No 387; we must have an 80287.
/
is287:
#if !defined(__GNUC_AS__)
	fsetpm				/ set the 80287 into protected mode
	movl	$FP_287, fp_kind	/ we have a 287 chip
#else
	movl	$FP_NO, fp_kind		/ maybe just explode here instead?
#endif
/
/ We have either a 287, 387, 486 or P5.
/ Setup cr0 to reflect the FPU hw type.
/
mathchip:
	movl	%cr0, %edx
	andl	$_BITNOT(CR0_TS|CR0_EM), %edx	/* clear emulate math bit */
	orl	$_CONST(CR0_MP|CR0_NE), %edx
	jmp	cont

/ Do not use the FPU
ignore_fpu:
	movl	$1, fpu_ignored
/ No FP hw present.
no_fpu_hw:
	movl	%cr0, %edx
	andl	$_BITNOT(CR0_MP), %edx	/* clear math chip present */
	movl	$FP_NO, fp_kind		/ signify that there is no FPU
	movl	$0, fpu_exists		/ no FPU present
cont:
	movl	%edx, %cr0		/ set machine status word
	ret
	SET_SIZE(fpu_probe)

#define	HOT_PATCH(srcaddr, dstaddr, size)	\
	movl	$srcaddr, %esi;			\
	movl	$dstaddr, %edi;			\
	movl	$size, %ebx;			\
0:	pushl	$1;				\
	movzbl	(%esi), %eax;			\
	pushl	%eax;				\
	pushl	%edi;				\
	call	hot_patch_kernel_text;		\
	addl	$12, %esp;			\
	inc	%edi;				\
	inc	%esi;				\
	dec	%ebx;				\
	test	%ebx, %ebx;			\
	jne	0b

	/*
	 * To cope with processors that do not implement fxsave/fxrstor
	 * instructions, patch hot paths in the kernel to use them only
	 * when that feature has been detected.
	 */
	ENTRY_NP(patch_sse)
	push	%ebp
	mov	%esp, %ebp
	push	%ebx
	push	%esi
	push	%edi
	/
	/	frstor (%eax); nop	-> fxrstor (%eax)
	/
	HOT_PATCH(_fxrstor_eax_insn, _patch_fxrstor_eax, 3)
	/
	/	nop; nop; nop		-> ldmxcsr (%ebx)
	/
	HOT_PATCH(_ldmxcsr_ebx_insn, _patch_ldmxcsr_ebx, 3)
	/
	/	lock; xorl $0, (%esp)	-> sfence; ret
	/
	HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4)
	pop	%edi
	pop	%esi
	pop	%ebx
	mov	%ebp, %esp
	pop	%ebp
	ret
_fxrstor_eax_insn:			/ see ndptrap_frstor()
	fxrstor	(%eax)
_ldmxcsr_ebx_insn:			/ see resume_from_zombie()
	ldmxcsr	(%ebx)
_sfence_ret_insn:			/ see membar_producer()
	.byte	0xf, 0xae, 0xf8		/ [sfence instruction]
	ret
	SET_SIZE(patch_sse)

	/*
	 * Ditto, but this time for functions that depend upon SSE2 extensions
	 */
	ENTRY_NP(patch_sse2)
	push	%ebp
	mov	%esp, %ebp
	push	%ebx
	push	%esi
	push	%edi
	/
	/	lock; xorl $0, (%esp)	-> lfence; ret
	/
	HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4)
	pop	%edi
	pop	%esi
	pop	%ebx
	mov	%ebp, %esp
	pop	%ebp
	ret
_lfence_ret_insn:			/ see membar_consumer()
	.byte	0xf, 0xae, 0xe8		/ [lfence instruction]	
	ret
	SET_SIZE(patch_sse2)

#endif	/* __i386 */
#endif	/* __lint */

	
/*
 * One of these routines is called from any lwp with floating
 * point context as part of the prolog of a context switch; the
 * routine starts the floating point state save operation.
 * The completion of the save is forced by an fwait just before
 * we truly switch contexts..
 */

#if defined(__lint)

/*ARGSUSED*/
void
fpnsave_begin(void *arg)
{}

/*ARGSUSED*/
void
fpxsave_begin(void *arg)
{}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fpxsave_begin)
	movl	FPU_CTX_FPU_FLAGS(%rdi), %edx
	cmpl	$FPU_EN, %edx
	jne	1f
#if FPU_CTX_FPU_REGS != 0
	addq	FPU_CTX_FPU_REGS, %rdi
#endif
	fxsave	(%rdi)
	fnclex				/* clear pending x87 exceptions */
1:	rep;	ret	/* use 2 byte return instruction when branch target */
			/* AMD Software Optimization Guide - Section 6.2 */
	SET_SIZE(fpxsave_begin)

#elif defined(__i386)

	ENTRY_NP(fpnsave_begin)
	mov	4(%esp), %eax		/ a struct fpu_ctx *
	mov	FPU_CTX_FPU_FLAGS(%eax), %edx
	cmpl	$FPU_EN, %edx
	jne	1f
#if FPU_CTX_FPU_REGS != 0
	addl	FPU_CTX_FPU_REGS, %eax
#endif
	fnsave	(%eax)
1:	rep;	ret	/* use 2 byte return instruction when branch target */
			/* AMD Software Optimization Guide - Section 6.2 */
	SET_SIZE(fpnsave_begin)

	ENTRY_NP(fpxsave_begin)
	mov	4(%esp), %eax		/ a struct fpu_ctx *
	mov	FPU_CTX_FPU_FLAGS(%eax), %edx
	cmpl	$FPU_EN, %edx
	jne	1f
#if FPU_CTX_FPU_REGS != 0
	addl	FPU_CTX_FPU_REGS, %eax
#endif
	fxsave	(%eax)
	fnclex				/ Clear pending x87 exceptions
1:	rep;	ret	/* use 2 byte return instruction when branch target */
			/* AMD Software Optimization Guide - Section 6.2 */
	SET_SIZE(fpxsave_begin)

#endif	/* __i386 */
#endif	/* __lint */

#if defined(__lint)

/*ARGSUSED*/
void
fpsave(struct fnsave_state *f)
{}

/*ARGSUSED*/
void
fpxsave(struct fxsave_state *f)
{}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fpxsave)
	clts				/* clear TS bit in CR0 */
	fxsave	(%rdi)
	fnclex				/* clear pending x87 exceptions */
	fwait				/* wait for completion */
	fninit				/* emulate fnsave: init x87 tags */
	movq	%cr0, %rax
	orq	$CR0_TS, %rax
	movq	%rax, %cr0		/* set TS bit in CR0 (disable FPU) */
	ret
	SET_SIZE(fpxsave)

#elif defined(__i386)

	ENTRY_NP(fpsave)
	clts				/ clear TS bit in CR0
	movl	4(%esp), %eax		/ load save address
	fnsave	(%eax)
	fwait				/ wait for completion
	movl	%cr0, %eax
	orl	$CR0_TS, %eax
	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
	ret
	SET_SIZE(fpsave)

	ENTRY_NP(fpxsave)
	clts				/ clear TS bit in CR0
	movl	4(%esp), %eax		/ save address
	fxsave	(%eax)
	fnclex				/ Clear pending x87 exceptions
	fwait				/ wait for completion
	fninit				/ emulate fnsave: init x87 tag words
	mov	%cr0, %eax
	orl	$CR0_TS, %eax
	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
	ret
	SET_SIZE(fpxsave)

#endif	/* __i386 */
#endif	/* __lint */

#if defined(__lint)

/*ARGSUSED*/
void
fprestore(struct fnsave_state *f)
{}

/*ARGSUSED*/
void
fpxrestore(struct fxsave_state *f)
{}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fpxrestore)
	clts				/* clear TS bit in CR0 */
	fxrstor	(%rdi)
	ret
	SET_SIZE(fpxrestore)

#elif defined(__i386)

	ENTRY_NP(fprestore)
	clts				/ clear TS bit in CR0
	movl	4(%esp), %eax		/ load restore address
	frstor	(%eax)
	ret
	SET_SIZE(fprestore)

	ENTRY_NP(fpxrestore)
	clts				/ clear TS bit in CR0
	movl	4(%esp), %eax		/ load restore address
	fxrstor	(%eax)
	ret
	SET_SIZE(fpxrestore)

#endif	/* __i386 */
#endif	/* __lint */

/*
 * Disable the floating point unit.
 */

#if defined(__lint)

void
fpdisable(void)
{}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fpdisable)
	movq	%cr0, %rax
	orq	$CR0_TS, %rax
	movq	%rax, %cr0		/* set TS bit in CR0 (disable FPU) */
	ret
	SET_SIZE(fpdisable)

#elif defined(__i386)

	ENTRY_NP(fpdisable)
	movl	%cr0, %eax
	orl	$CR0_TS, %eax
	movl	%eax, %cr0		/ set TS bit in CR0 (disable FPU)
	ret
	SET_SIZE(fpdisable)

#endif	/* __i386 */
#endif	/* __lint */

/*
 * Initialize the fpu hardware.
 */

#if defined(__lint)

void
fpinit(void)
{}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fpinit)
	clts				/* clear TS bit in CR0 */
	leaq	sse_initial(%rip), %rax
	fxrstor	(%rax)			/* load clean initial state */
	ret
	SET_SIZE(fpinit)

#elif defined(__i386)

	ENTRY_NP(fpinit)
	clts				/ clear TS bit in CR0
	cmpl	$__FP_SSE, fp_kind
	je	1f

	fninit				/ initialize the chip
	movl	$x87_initial, %eax
	frstor	(%eax)			/ load clean initial state
	ret
1:
	movl	$sse_initial, %eax
	fxrstor	(%eax)			/ load clean initial state
	ret
	SET_SIZE(fpinit)

#endif	/* __i386 */
#endif	/* __lint */

/*
 * Clears FPU exception state.
 * Returns the FP status word.
 */

#if defined(__lint)

uint32_t
fperr_reset(void)
{
	return (0);
}

uint32_t
fpxerr_reset(void)
{
	return (0);
}

#else	/* __lint */

#if defined(__amd64)

	ENTRY_NP(fperr_reset)
	xorl	%eax, %eax
	clts				/* clear TS bit in CR0 */
	fnstsw	%ax			/* get status */
	fnclex				/* clear processor exceptions */
	ret
	SET_SIZE(fperr_reset)

	ENTRY_NP(fpxerr_reset)
	pushq	%rbp
	movq	%rsp, %rbp
	subq	$0x10, %rsp		/* make some temporary space */
	clts				/* clear TS bit in CR0 */
	stmxcsr	(%rsp)			/* get status */
	movl	(%rsp), %eax
	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%rsp)
	ldmxcsr	(%rsp)			/* clear processor exceptions */
	leave
	ret
	SET_SIZE(fpxerr_reset)

#elif defined(__i386)

	ENTRY_NP(fperr_reset)
	xorl	%eax, %eax
	clts				/ clear TS bit in CR0
	fnstsw	%ax			/ get status
	fnclex				/ clear processor exceptions
	ret
	SET_SIZE(fperr_reset)

	ENTRY_NP(fpxerr_reset)
	clts				/ clear TS bit in CR0
	subl	$4, %esp		/ make some temporary space
	stmxcsr	(%esp)			/ get status
	movl	(%esp), %eax
	andl	$_BITNOT(SSE_MXCSR_EFLAGS), (%esp)
	ldmxcsr	(%esp)			/ clear processor exceptions
	addl	$4, %esp
	ret
	SET_SIZE(fpxerr_reset)

#endif	/* __i386 */
#endif	/* __lint */

#if defined(__lint)

uint32_t
fpgetcwsw(void)
{
	return (0);
}

#else   /* __lint */

#if defined(__amd64)

	ENTRY_NP(fpgetcwsw)
	pushq	%rbp
	movq	%rsp, %rbp
	subq	$0x10, %rsp		/* make some temporary space	*/
	clts				/* clear TS bit in CR0		*/
	fnstsw	(%rsp)			/* store the status word	*/
	fnstcw	2(%rsp)			/* store the control word	*/
	movl	(%rsp), %eax		/* put both in %eax		*/
	leave
	ret
	SET_SIZE(fpgetcwsw)

#elif defined(__i386)

	ENTRY_NP(fpgetcwsw)
	clts				/* clear TS bit in CR0		*/
	subl	$4, %esp		/* make some temporary space	*/
	fnstsw	(%esp)			/* store the status word	*/
	fnstcw	2(%esp)			/* store the control word	*/
	movl	(%esp), %eax		/* put both in %eax		*/
	addl	$4, %esp
	ret
	SET_SIZE(fpgetcwsw)

#endif	/* __i386 */
#endif  /* __lint */

/*
 * Returns the MXCSR register.
 */

#if defined(__lint)

uint32_t
fpgetmxcsr(void)
{
	return (0);
}

#else   /* __lint */

#if defined(__amd64)

	ENTRY_NP(fpgetmxcsr)
	pushq	%rbp
	movq	%rsp, %rbp
	subq	$0x10, %rsp		/* make some temporary space	*/
	clts				/* clear TS bit in CR0		*/
	stmxcsr	(%rsp)			/* get status			*/
	movl	(%rsp), %eax
	leave
	ret
	SET_SIZE(fpgetmxcsr)

#elif defined(__i386)

	ENTRY_NP(fpgetmxcsr)
	clts				/* clear TS bit in CR0		*/
	subl	$4, %esp		/* make some temporary space	*/
	stmxcsr	(%esp)			/* get status			*/
	movl	(%esp), %eax
	addl	$4, %esp
	ret
	SET_SIZE(fpgetmxcsr)

#endif	/* __i386 */
#endif  /* __lint */