/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License (the "License"). * You may not use this file except in compliance with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ /* All Rights Reserved */ /* Copyright (c) 1987, 1988 Microsoft Corporation */ /* All Rights Reserved */ /* * Copyright (c) 2009, Intel Corporation. * All rights reserved. */ #include #include #include #include #include #if defined(__lint) #include #include #else #include "assym.h" #endif #if defined(__lint) uint_t fpu_initial_probe(void) { return (0); } #else /* __lint */ /* * Returns zero if x87 "chip" is present(!) */ ENTRY_NP(fpu_initial_probe) CLTS fninit fnstsw %ax movzbl %al, %eax ret SET_SIZE(fpu_initial_probe) #endif /* __lint */ #if defined(__lint) /*ARGSUSED*/ void fxsave_insn(struct fxsave_state *fx) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fxsave_insn) FXSAVEQ ((%rdi)) ret SET_SIZE(fxsave_insn) #elif defined(__i386) ENTRY_NP(fxsave_insn) movl 4(%esp), %eax fxsave (%eax) ret SET_SIZE(fxsave_insn) #endif #endif /* __lint */ #if defined(__i386) /* * If (num1/num2 > num1/num3) the FPU has the FDIV bug. */ #if defined(__lint) int fpu_probe_pentium_fdivbug(void) { return (0); } #else /* __lint */ ENTRY_NP(fpu_probe_pentium_fdivbug) fldl .num1 fldl .num2 fdivr %st(1), %st fxch %st(1) fdivl .num3 fcompp fstsw %ax sahf jae 0f movl $1, %eax ret 0: xorl %eax, %eax ret .align 4 .num1: .4byte 0xbce4217d /* 4.999999 */ .4byte 0x4013ffff .num2: .4byte 0x0 /* 15.0 */ .4byte 0x402e0000 .num3: .4byte 0xde7210bf /* 14.999999 */ .4byte 0x402dffff SET_SIZE(fpu_probe_pentium_fdivbug) #endif /* __lint */ /* * To cope with processors that do not implement fxsave/fxrstor * instructions, patch hot paths in the kernel to use them only * when that feature has been detected. */ #if defined(__lint) void patch_sse(void) {} void patch_sse2(void) {} void patch_xsave(void) {} #else /* __lint */ ENTRY_NP(patch_sse) _HOT_PATCH_PROLOG / / frstor (%ebx); nop -> fxrstor (%ebx) / _HOT_PATCH(_fxrstor_ebx_insn, _patch_fxrstor_ebx, 3) / / lock; xorl $0, (%esp) -> sfence; ret / _HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4) _HOT_PATCH_EPILOG ret _fxrstor_ebx_insn: / see ndptrap_frstor() fxrstor (%ebx) _ldmxcsr_ebx_insn: / see resume_from_zombie() ldmxcsr (%ebx) _sfence_ret_insn: / see membar_producer() .byte 0xf, 0xae, 0xf8 / [sfence instruction] ret SET_SIZE(patch_sse) ENTRY_NP(patch_sse2) _HOT_PATCH_PROLOG / / lock; xorl $0, (%esp) -> lfence; ret / _HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4) _HOT_PATCH_EPILOG ret _lfence_ret_insn: / see membar_consumer() .byte 0xf, 0xae, 0xe8 / [lfence instruction] ret SET_SIZE(patch_sse2) /* * Patch lazy fp restore instructions in the trap handler * to use xrstor instead of frstor */ ENTRY_NP(patch_xsave) _HOT_PATCH_PROLOG / / frstor (%ebx); nop -> xrstor (%ebx) / _HOT_PATCH(_xrstor_ebx_insn, _patch_xrstor_ebx, 3) _HOT_PATCH_EPILOG ret _xrstor_ebx_insn: / see ndptrap_frstor() #xrstor (%ebx) .byte 0x0f, 0xae, 0x2b SET_SIZE(patch_xsave) #endif /* __lint */ #endif /* __i386 */ #if defined(__amd64) #if defined(__lint) void patch_xsave(void) {} #else /* __lint */ /* * Patch lazy fp restore instructions in the trap handler * to use xrstor instead of fxrstorq */ ENTRY_NP(patch_xsave) pushq %rbx pushq %rbp pushq %r15 / / FXRSTORQ (%rbx); -> xrstor (%rbx) / hot_patch(_xrstor_rbx_insn, _patch_xrstorq_rbx, 4) / leaq _patch_xrstorq_rbx(%rip), %rbx leaq _xrstor_rbx_insn(%rip), %rbp movq $4, %r15 1: movq %rbx, %rdi /* patch address */ movzbq (%rbp), %rsi /* instruction byte */ movq $1, %rdx /* count */ call hot_patch_kernel_text addq $1, %rbx addq $1, %rbp subq $1, %r15 jnz 1b popq %r15 popq %rbp popq %rbx ret _xrstor_rbx_insn: / see ndptrap_frstor() #rex.W=1 (.byte 0x48) #xrstor (%rbx) .byte 0x48, 0x0f, 0xae, 0x2b SET_SIZE(patch_xsave) #endif /* __lint */ #endif /* __amd64 */ /* * One of these routines is called from any lwp with floating * point context as part of the prolog of a context switch. */ #if defined(__lint) /*ARGSUSED*/ void xsave_ctxt(void *arg) {} /*ARGSUSED*/ void fpxsave_ctxt(void *arg) {} /*ARGSUSED*/ void fpnsave_ctxt(void *arg) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpxsave_ctxt) cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) FXSAVEQ (FPU_CTX_FPU_REGS(%rdi)) /* * On certain AMD processors, the "exception pointers" i.e. the last * instruction pointer, last data pointer, and last opcode * are saved by the fxsave instruction ONLY if the exception summary * bit is set. * * To ensure that we don't leak these values into the next context * on the cpu, we could just issue an fninit here, but that's * rather slow and so we issue an instruction sequence that * clears them more quickly, if a little obscurely. */ btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ jnc 0f /* jump if ES = 0 */ fnclex /* clear pending x87 exceptions */ 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ fildl .fpzero_const(%rip) /* dummy load changes all exception pointers */ STTS(%rsi) /* trap on next fpu touch */ 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpxsave_ctxt) ENTRY_NP(xsave_ctxt) cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%rdi) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%rdi) /* * Setup xsave flags in EDX:EAX */ movl FPU_CTX_FPU_XSAVE_MASK(%rdi), %eax movl FPU_CTX_FPU_XSAVE_MASK+4(%rdi), %edx leaq FPU_CTX_FPU_REGS(%rdi), %rsi #xsave (%rsi) .byte 0x0f, 0xae, 0x26 /* * (see notes above about "exception pointers") * TODO: does it apply to any machine that uses xsave? */ btw $7, FXSAVE_STATE_FSW(%rdi) /* Test saved ES bit */ jnc 0f /* jump if ES = 0 */ fnclex /* clear pending x87 exceptions */ 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ fildl .fpzero_const(%rip) /* dummy load changes all exception pointers */ STTS(%rsi) /* trap on next fpu touch */ 1: ret SET_SIZE(xsave_ctxt) #elif defined(__i386) ENTRY_NP(fpnsave_ctxt) movl 4(%esp), %eax /* a struct fpu_ctx */ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) fnsave FPU_CTX_FPU_REGS(%eax) /* (fnsave also reinitializes x87 state) */ STTS(%edx) /* trap on next fpu touch */ 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpnsave_ctxt) ENTRY_NP(fpxsave_ctxt) movl 4(%esp), %eax /* a struct fpu_ctx */ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%eax) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%eax) fxsave FPU_CTX_FPU_REGS(%eax) /* (see notes above about "exception pointers") */ btw $7, FXSAVE_STATE_FSW(%eax) /* Test saved ES bit */ jnc 0f /* jump if ES = 0 */ fnclex /* clear pending x87 exceptions */ 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ fildl .fpzero_const /* dummy load changes all exception pointers */ STTS(%edx) /* trap on next fpu touch */ 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpxsave_ctxt) ENTRY_NP(xsave_ctxt) movl 4(%esp), %ecx /* a struct fpu_ctx */ cmpl $FPU_EN, FPU_CTX_FPU_FLAGS(%ecx) jne 1f movl $_CONST(FPU_VALID|FPU_EN), FPU_CTX_FPU_FLAGS(%ecx) movl FPU_CTX_FPU_XSAVE_MASK(%ecx), %eax movl FPU_CTX_FPU_XSAVE_MASK+4(%ecx), %edx leal FPU_CTX_FPU_REGS(%ecx), %ecx #xsave (%ecx) .byte 0x0f, 0xae, 0x21 /* * (see notes above about "exception pointers") * TODO: does it apply to any machine that uses xsave? */ btw $7, FXSAVE_STATE_FSW(%ecx) /* Test saved ES bit */ jnc 0f /* jump if ES = 0 */ fnclex /* clear pending x87 exceptions */ 0: ffree %st(7) /* clear tag bit to remove possible stack overflow */ fildl .fpzero_const /* dummy load changes all exception pointers */ STTS(%edx) /* trap on next fpu touch */ 1: ret SET_SIZE(xsave_ctxt) #endif /* __i386 */ .align 8 .fpzero_const: .4byte 0x0 .4byte 0x0 #endif /* __lint */ #if defined(__lint) /*ARGSUSED*/ void fpsave(struct fnsave_state *f) {} /*ARGSUSED*/ void fpxsave(struct fxsave_state *f) {} /*ARGSUSED*/ void xsave(struct xsave_state *f, uint64_t m) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpxsave) CLTS FXSAVEQ ((%rdi)) fninit /* clear exceptions, init x87 tags */ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(fpxsave) ENTRY_NP(xsave) CLTS movl %esi, %eax /* bv mask */ movq %rsi, %rdx shrq $32, %rdx #xsave (%rdi) .byte 0x0f, 0xae, 0x27 fninit /* clear exceptions, init x87 tags */ STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(xsave) #elif defined(__i386) ENTRY_NP(fpsave) CLTS movl 4(%esp), %eax fnsave (%eax) STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(fpsave) ENTRY_NP(fpxsave) CLTS movl 4(%esp), %eax fxsave (%eax) fninit /* clear exceptions, init x87 tags */ STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(fpxsave) ENTRY_NP(xsave) CLTS movl 4(%esp), %ecx movl 8(%esp), %eax movl 12(%esp), %edx #xsave (%ecx) .byte 0x0f, 0xae, 0x21 fninit /* clear exceptions, init x87 tags */ STTS(%eax) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(xsave) #endif /* __i386 */ #endif /* __lint */ #if defined(__lint) /*ARGSUSED*/ void fprestore(struct fnsave_state *f) {} /*ARGSUSED*/ void fpxrestore(struct fxsave_state *f) {} /*ARGSUSED*/ void xrestore(struct xsave_state *f, uint64_t m) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpxrestore) CLTS FXRSTORQ ((%rdi)) ret SET_SIZE(fpxrestore) ENTRY_NP(xrestore) CLTS movl %esi, %eax /* bv mask */ movq %rsi, %rdx shrq $32, %rdx #xrstor (%rdi) .byte 0x0f, 0xae, 0x2f ret SET_SIZE(xrestore) #elif defined(__i386) ENTRY_NP(fprestore) CLTS movl 4(%esp), %eax frstor (%eax) ret SET_SIZE(fprestore) ENTRY_NP(fpxrestore) CLTS movl 4(%esp), %eax fxrstor (%eax) ret SET_SIZE(fpxrestore) ENTRY_NP(xrestore) CLTS movl 4(%esp), %ecx movl 8(%esp), %eax movl 12(%esp), %edx #xrstor (%ecx) .byte 0x0f, 0xae, 0x29 ret SET_SIZE(xrestore) #endif /* __i386 */ #endif /* __lint */ /* * Disable the floating point unit. */ #if defined(__lint) void fpdisable(void) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpdisable) STTS(%rdi) /* set TS bit in %cr0 (disable FPU) */ ret SET_SIZE(fpdisable) #elif defined(__i386) ENTRY_NP(fpdisable) STTS(%eax) ret SET_SIZE(fpdisable) #endif /* __i386 */ #endif /* __lint */ /* * Initialize the fpu hardware. */ #if defined(__lint) void fpinit(void) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpinit) CLTS cmpl $FP_XSAVE, fp_save_mech je 1f /* fxsave */ leaq sse_initial(%rip), %rax FXRSTORQ ((%rax)) /* load clean initial state */ ret 1: /* xsave */ leaq avx_initial(%rip), %rcx xorl %edx, %edx movl $XFEATURE_AVX, %eax bt $X86FSET_AVX, x86_featureset cmovael %edx, %eax orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax /* xrstor (%rcx) */ .byte 0x0f, 0xae, 0x29 /* load clean initial state */ ret SET_SIZE(fpinit) #elif defined(__i386) ENTRY_NP(fpinit) CLTS cmpl $FP_FXSAVE, fp_save_mech je 1f cmpl $FP_XSAVE, fp_save_mech je 2f /* fnsave */ fninit movl $x87_initial, %eax frstor (%eax) /* load clean initial state */ ret 1: /* fxsave */ movl $sse_initial, %eax fxrstor (%eax) /* load clean initial state */ ret 2: /* xsave */ movl $avx_initial, %ecx xorl %edx, %edx movl $XFEATURE_AVX, %eax bt $X86FSET_AVX, x86_featureset cmovael %edx, %eax orl $(XFEATURE_LEGACY_FP | XFEATURE_SSE), %eax /* xrstor (%ecx) */ .byte 0x0f, 0xae, 0x29 /* load clean initial state */ ret SET_SIZE(fpinit) #endif /* __i386 */ #endif /* __lint */ /* * Clears FPU exception state. * Returns the FP status word. */ #if defined(__lint) uint32_t fperr_reset(void) { return (0); } uint32_t fpxerr_reset(void) { return (0); } #else /* __lint */ #if defined(__amd64) ENTRY_NP(fperr_reset) CLTS xorl %eax, %eax fnstsw %ax fnclex ret SET_SIZE(fperr_reset) ENTRY_NP(fpxerr_reset) pushq %rbp movq %rsp, %rbp subq $0x10, %rsp /* make some temporary space */ CLTS stmxcsr (%rsp) movl (%rsp), %eax andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) ldmxcsr (%rsp) /* clear processor exceptions */ leave ret SET_SIZE(fpxerr_reset) #elif defined(__i386) ENTRY_NP(fperr_reset) CLTS xorl %eax, %eax fnstsw %ax fnclex ret SET_SIZE(fperr_reset) ENTRY_NP(fpxerr_reset) CLTS subl $4, %esp /* make some temporary space */ stmxcsr (%esp) movl (%esp), %eax andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp) ldmxcsr (%esp) /* clear processor exceptions */ addl $4, %esp ret SET_SIZE(fpxerr_reset) #endif /* __i386 */ #endif /* __lint */ #if defined(__lint) uint32_t fpgetcwsw(void) { return (0); } #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpgetcwsw) pushq %rbp movq %rsp, %rbp subq $0x10, %rsp /* make some temporary space */ CLTS fnstsw (%rsp) /* store the status word */ fnstcw 2(%rsp) /* store the control word */ movl (%rsp), %eax /* put both in %eax */ leave ret SET_SIZE(fpgetcwsw) #elif defined(__i386) ENTRY_NP(fpgetcwsw) CLTS subl $4, %esp /* make some temporary space */ fnstsw (%esp) /* store the status word */ fnstcw 2(%esp) /* store the control word */ movl (%esp), %eax /* put both in %eax */ addl $4, %esp ret SET_SIZE(fpgetcwsw) #endif /* __i386 */ #endif /* __lint */ /* * Returns the MXCSR register. */ #if defined(__lint) uint32_t fpgetmxcsr(void) { return (0); } #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpgetmxcsr) pushq %rbp movq %rsp, %rbp subq $0x10, %rsp /* make some temporary space */ CLTS stmxcsr (%rsp) movl (%rsp), %eax leave ret SET_SIZE(fpgetmxcsr) #elif defined(__i386) ENTRY_NP(fpgetmxcsr) CLTS subl $4, %esp /* make some temporary space */ stmxcsr (%esp) movl (%esp), %eax addl $4, %esp ret SET_SIZE(fpgetmxcsr) #endif /* __i386 */ #endif /* __lint */