/* * CDDL HEADER START * * The contents of this file are subject to the terms of the * Common Development and Distribution License, Version 1.0 only * (the "License"). You may not use this file except in compliance * with the License. * * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE * or http://www.opensolaris.org/os/licensing. * See the License for the specific language governing permissions * and limitations under the License. * * When distributing Covered Code, include this CDDL HEADER in each * file and include the License file at usr/src/OPENSOLARIS.LICENSE. * If applicable, add the following below this CDDL HEADER, with the * fields enclosed by brackets "[]" replaced with your own identifying * information: Portions Copyright [yyyy] [name of copyright owner] * * CDDL HEADER END */ /* * Copyright 2005 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */ /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */ /* All Rights Reserved */ /* Copyright (c) 1987, 1988 Microsoft Corporation */ /* All Rights Reserved */ #pragma ident "%Z%%M% %I% %E% SMI" #include #include #include #include #if defined(__lint) #include #include #else #include "assym.h" #endif #if defined(__lint) int fpu_exists = 1; int fp_kind = FP_387; int fpu_ignored = 0; int use_sse_pagecopy = 0; int use_sse_pagezero = 0; int use_sse_copy = 0; #if defined(__i386) int fpu_pentium_fdivbug = 0; #endif #else /* __lint */ /* * If fpu_exists is non-zero, fpu_probe will attempt to use any * hardware FPU (subject to other constraints, see below). If * fpu_exists is zero, fpu_probe will report that there is no * FPU even if there is one. */ DGDEF3(fpu_exists, 4, 4) .long 1 DGDEF3(fp_kind, 4, 4) .long FP_387 /* FP_NO, FP_287, FP_387, etc. */ /* * The variable fpu_ignored is provided to allow other code to * determine whether emulation is being done because there is * no FPU or because of an override requested via /etc/system. */ DGDEF3(fpu_ignored, 4, 4) .long 0 /* * Used by ppcopy, ppzero, and xcopyin to determine whether or not * to use the SSE-based routines */ DGDEF3(use_sse_pagecopy, 4, 4) .long 0 DGDEF3(use_sse_pagezero, 4, 4) .long 0 DGDEF3(use_sse_copy, 4, 4) .long 0 #if defined(__i386) /* * The variable fpu_pentium_fdivbug is provided to allow other code to * determine whether the system contains a Pentium with the FDIV * problem. */ DGDEF3(fpu_pentium_fdivbug, 4, 4) .long 0 /* * The following constants are used for detecting the Pentium * divide bug. */ .align 4 num1: .4byte 0xbce4217d /* 4.999999 */ .4byte 0x4013ffff num2: .4byte 0x0 /* 15.0 */ .4byte 0x402e0000 num3: .4byte 0xde7210bf /* 14.999999 */ .4byte 0x402dffff #endif /* __i386 */ #endif /* __lint */ /* * FPU probe - check if we have any FP chip present by trying to do a reset. * If that succeeds, differentiate via cr0. Called from autoconf. */ #if defined(__lint) /*ARGSUSED*/ void fpu_probe(void) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpu_probe) pushq %rbp movq %rsp, %rbp clts /* clear task switched bit in CR0 */ fninit /* initialize chip */ fnstsw %ax /* get status */ orb %al, %al /* status zero? 0 = chip present */ jnz no_fpu_hw /* * Ignore the FPU if fp_exists == 0 */ cmpl $0, fpu_exists(%rip) je ignore_fpu /* * we have a chip of some sort; use cr0 to differentiate */ movq %cr0, %rdx /* check for fpu present flag */ testl $CR0_ET, %edx jz no_fpu_hw /* z -> fpu not present */ testl $X86_SSE, x86_feature(%rip) je no_fpu_hw /* SSE is utterly required */ testl $X86_SSE2, x86_feature(%rip) je no_fpu_hw /* SSE2 too .. */ movl $__FP_SSE, fp_kind(%rip) /* * Tell the processor what we're doing via %cr4 */ movq %cr4, %rax orq $_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax movq %rax, %cr4 /* * make other CPUs share the same cr4 settings */ orq $_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip) /* * extract the MXCSR_MASK field from our first fxsave */ subq $FXSAVE_STATE_SIZE, %rsp movl $0, FXSAVE_STATE_MXCSR_MASK(%rsp) fxsave (%rsp) movl FXSAVE_STATE_MXCSR_MASK(%rsp), %eax cmpl $0, %eax je 1f /* default mask value set in fpu.c */ movl %eax, sse_mxcsr_mask(%rip) /* override mask set here */ 1: movq %cr0, %rax andq $_BITNOT(CR0_TS|CR0_EM), %rdx /* clear emulate math bit */ orq $_CONST(CR0_MP|CR0_NE), %rdx /* * We have SSE and SSE2 so enable the extensions for * non-temporal copies and stores. */ movl $1, use_sse_pagecopy movl $1, use_sse_pagezero movl $1, use_sse_copy jmp done /* * Do not use the FPU at all */ ignore_fpu: movl $1, fpu_ignored(%rip) /* * No FPU hardware present */ no_fpu_hw: andq $_BITNOT(CR0_MP), %rdx /* clear math chip present */ orq $CR0_EM, %rdx /* set emulate math bit */ movl $FP_NO, fp_kind(%rip) /* signify that there is no FPU */ movl $0, fpu_exists(%rip) /* no FPU present */ /* * Disable the XMM-related gorp too, in case the BIOS set them */ movq %cr4, %rax andq $_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %rax movq %rax, %cr4 andq $_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value(%rip) done: movq %rdx, %cr0 /* set machine status word */ leave ret SET_SIZE(fpu_probe) #elif defined(__i386) ENTRY_NP(fpu_probe) clts / clear task switched bit in CR0 fninit / initialize chip fnstsw %ax / get status orb %al, %al / status zero? 0 = chip present jnz no_fpu_hw / no, use emulator / / If there is an FP, look for the Pentium FDIV problem even if we / do not plan to use it. Set fpu_pentium_fdivbug is a bad FPU is / detected. Subsequent code can report the result if desired. / / If (num1/num2 > num1/num3) the FPU has the FDIV bug. / fldl num1 fldl num2 fdivr %st(1), %st fxch %st(1) fdivl num3 fcompp fstsw %ax sahf jae no_bug movl $1, fpu_pentium_fdivbug no_bug: / / Repeat the earlier initialization sequence so that the FPU is left in / the expected state. / fninit fnstsw %ax / / Ignore the FPU if fpu_exists == 0 / cmpl $0, fpu_exists je ignore_fpu / / Ignore the FPU if it has the Pentium bug / cmpl $0, fpu_pentium_fdivbug jne ignore_fpu / / at this point we know we have a chip of some sort; / use cr0 to differentiate. / movl %cr0, %edx / check for 387 present flag testl $CR0_ET, %edx / ... jz is287 / z -> 387 not present movl $FP_387, fp_kind / we have a 387 or later chip / / clear the "XMM supported" bits in %cr4 in case the BIOS set them / erroneously -- see 4965674 / movl %cr4, %eax andl $_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax movl %eax, %cr4 andl $_BITNOT(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value testl $X86_SSE, x86_feature / can we do SSE? je mathchip / / aha .. we have an SSE-capable chip / / - set fpsave_begin to fpxsave_begin / - hot patch performance critical code to use fxsave/fxrstor directly, / and hot patch membar_producer() to use sfence instead of lock / - tell the processor what we're doing via %cr4 / - allow fully fledged #XM exceptions to be generated by SSE/SSE2 / (the default mask set in fpinit() disables them) / - determine the mxcsr_mask so we can avoid setting reserved bits / movl $__FP_SSE, fp_kind movl $fpxsave_begin, %eax movl %eax, fpsave_begin call patch_sse mov %cr4, %eax orl $_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), %eax mov %eax, %cr4 / / make other CPUs share the same cr4 settings / orl $_CONST(CR4_OSFXSR | CR4_OSXMMEXCPT), cr4_value / / extract the MXCSR_MASK field from our first fxsave / subl $FXSAVE_STATE_SIZE + XMM_ALIGN, %esp movl %esp, %eax addl $XMM_ALIGN, %eax andl $_BITNOT(XMM_ALIGN-1), %eax /* 16-byte alignment */ movl $0, FXSAVE_STATE_MXCSR_MASK(%eax) fxsave (%eax) movl FXSAVE_STATE_MXCSR_MASK(%eax), %eax addl $FXSAVE_STATE_SIZE + XMM_ALIGN, %esp cmpl $0, %eax je 1f / default mask value set in fpu.c movl %eax, sse_mxcsr_mask / override mask set here 1: testl $X86_SSE2, x86_feature / can we do SSE2? je mathchip / / aha .. we have an SSE2-capable chip / / - enable pagezero and pagecopy using non-temporal instructions / - hot patch membar_consumer() to use lfence instead of lock / movl $1, use_sse_pagecopy / will now call hwblkpagecopy movl $1, use_sse_pagezero / will now call hwblkclr movl $1, use_sse_copy call patch_sse2 jmp mathchip / / No 387; we must have an 80287. / is287: #if !defined(__GNUC_AS__) fsetpm / set the 80287 into protected mode movl $FP_287, fp_kind / we have a 287 chip #else movl $FP_NO, fp_kind / maybe just explode here instead? #endif / / We have either a 287, 387, 486 or P5. / Setup cr0 to reflect the FPU hw type. / mathchip: movl %cr0, %edx andl $_BITNOT(CR0_TS|CR0_EM), %edx /* clear emulate math bit */ orl $_CONST(CR0_MP|CR0_NE), %edx jmp cont / Do not use the FPU ignore_fpu: movl $1, fpu_ignored / No FP hw present. no_fpu_hw: movl %cr0, %edx andl $_BITNOT(CR0_MP), %edx /* clear math chip present */ movl $FP_NO, fp_kind / signify that there is no FPU movl $0, fpu_exists / no FPU present cont: movl %edx, %cr0 / set machine status word ret SET_SIZE(fpu_probe) #define HOT_PATCH(srcaddr, dstaddr, size) \ movl $srcaddr, %esi; \ movl $dstaddr, %edi; \ movl $size, %ebx; \ 0: pushl $1; \ movzbl (%esi), %eax; \ pushl %eax; \ pushl %edi; \ call hot_patch_kernel_text; \ addl $12, %esp; \ inc %edi; \ inc %esi; \ dec %ebx; \ test %ebx, %ebx; \ jne 0b /* * To cope with processors that do not implement fxsave/fxrstor * instructions, patch hot paths in the kernel to use them only * when that feature has been detected. */ ENTRY_NP(patch_sse) push %ebp mov %esp, %ebp push %ebx push %esi push %edi / / frstor (%eax); nop -> fxrstor (%eax) / HOT_PATCH(_fxrstor_eax_insn, _patch_fxrstor_eax, 3) / / nop; nop; nop -> ldmxcsr (%ebx) / HOT_PATCH(_ldmxcsr_ebx_insn, _patch_ldmxcsr_ebx, 3) / / lock; xorl $0, (%esp) -> sfence; ret / HOT_PATCH(_sfence_ret_insn, _patch_sfence_ret, 4) pop %edi pop %esi pop %ebx mov %ebp, %esp pop %ebp ret _fxrstor_eax_insn: / see ndptrap_frstor() fxrstor (%eax) _ldmxcsr_ebx_insn: / see resume_from_zombie() ldmxcsr (%ebx) _sfence_ret_insn: / see membar_producer() .byte 0xf, 0xae, 0xf8 / [sfence instruction] ret SET_SIZE(patch_sse) /* * Ditto, but this time for functions that depend upon SSE2 extensions */ ENTRY_NP(patch_sse2) push %ebp mov %esp, %ebp push %ebx push %esi push %edi / / lock; xorl $0, (%esp) -> lfence; ret / HOT_PATCH(_lfence_ret_insn, _patch_lfence_ret, 4) pop %edi pop %esi pop %ebx mov %ebp, %esp pop %ebp ret _lfence_ret_insn: / see membar_consumer() .byte 0xf, 0xae, 0xe8 / [lfence instruction] ret SET_SIZE(patch_sse2) #endif /* __i386 */ #endif /* __lint */ /* * One of these routines is called from any lwp with floating * point context as part of the prolog of a context switch; the * routine starts the floating point state save operation. * The completion of the save is forced by an fwait just before * we truly switch contexts.. */ #if defined(__lint) /*ARGSUSED*/ void fpnsave_begin(void *arg) {} /*ARGSUSED*/ void fpxsave_begin(void *arg) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpxsave_begin) movl FPU_CTX_FPU_FLAGS(%rdi), %edx cmpl $FPU_EN, %edx jne 1f #if FPU_CTX_FPU_REGS != 0 addq FPU_CTX_FPU_REGS, %rdi #endif fxsave (%rdi) fnclex /* clear pending x87 exceptions */ 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpxsave_begin) #elif defined(__i386) ENTRY_NP(fpnsave_begin) mov 4(%esp), %eax / a struct fpu_ctx * mov FPU_CTX_FPU_FLAGS(%eax), %edx cmpl $FPU_EN, %edx jne 1f #if FPU_CTX_FPU_REGS != 0 addl FPU_CTX_FPU_REGS, %eax #endif fnsave (%eax) 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpnsave_begin) ENTRY_NP(fpxsave_begin) mov 4(%esp), %eax / a struct fpu_ctx * mov FPU_CTX_FPU_FLAGS(%eax), %edx cmpl $FPU_EN, %edx jne 1f #if FPU_CTX_FPU_REGS != 0 addl FPU_CTX_FPU_REGS, %eax #endif fxsave (%eax) fnclex / Clear pending x87 exceptions 1: rep; ret /* use 2 byte return instruction when branch target */ /* AMD Software Optimization Guide - Section 6.2 */ SET_SIZE(fpxsave_begin) #endif /* __i386 */ #endif /* __lint */ #if defined(__lint) /*ARGSUSED*/ void fpsave(struct fnsave_state *f) {} /*ARGSUSED*/ void fpxsave(struct fxsave_state *f) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpxsave) clts /* clear TS bit in CR0 */ fxsave (%rdi) fnclex /* clear pending x87 exceptions */ fwait /* wait for completion */ fninit /* emulate fnsave: init x87 tags */ movq %cr0, %rax orq $CR0_TS, %rax movq %rax, %cr0 /* set TS bit in CR0 (disable FPU) */ ret SET_SIZE(fpxsave) #elif defined(__i386) ENTRY_NP(fpsave) clts / clear TS bit in CR0 movl 4(%esp), %eax / load save address fnsave (%eax) fwait / wait for completion movl %cr0, %eax orl $CR0_TS, %eax movl %eax, %cr0 / set TS bit in CR0 (disable FPU) ret SET_SIZE(fpsave) ENTRY_NP(fpxsave) clts / clear TS bit in CR0 movl 4(%esp), %eax / save address fxsave (%eax) fnclex / Clear pending x87 exceptions fwait / wait for completion fninit / emulate fnsave: init x87 tag words mov %cr0, %eax orl $CR0_TS, %eax movl %eax, %cr0 / set TS bit in CR0 (disable FPU) ret SET_SIZE(fpxsave) #endif /* __i386 */ #endif /* __lint */ #if defined(__lint) /*ARGSUSED*/ void fprestore(struct fnsave_state *f) {} /*ARGSUSED*/ void fpxrestore(struct fxsave_state *f) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpxrestore) clts /* clear TS bit in CR0 */ fxrstor (%rdi) ret SET_SIZE(fpxrestore) #elif defined(__i386) ENTRY_NP(fprestore) clts / clear TS bit in CR0 movl 4(%esp), %eax / load restore address frstor (%eax) ret SET_SIZE(fprestore) ENTRY_NP(fpxrestore) clts / clear TS bit in CR0 movl 4(%esp), %eax / load restore address fxrstor (%eax) ret SET_SIZE(fpxrestore) #endif /* __i386 */ #endif /* __lint */ /* * Disable the floating point unit. */ #if defined(__lint) void fpdisable(void) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpdisable) movq %cr0, %rax orq $CR0_TS, %rax movq %rax, %cr0 /* set TS bit in CR0 (disable FPU) */ ret SET_SIZE(fpdisable) #elif defined(__i386) ENTRY_NP(fpdisable) movl %cr0, %eax orl $CR0_TS, %eax movl %eax, %cr0 / set TS bit in CR0 (disable FPU) ret SET_SIZE(fpdisable) #endif /* __i386 */ #endif /* __lint */ /* * Initialize the fpu hardware. */ #if defined(__lint) void fpinit(void) {} #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpinit) clts /* clear TS bit in CR0 */ leaq sse_initial(%rip), %rax fxrstor (%rax) /* load clean initial state */ ret SET_SIZE(fpinit) #elif defined(__i386) ENTRY_NP(fpinit) clts / clear TS bit in CR0 cmpl $__FP_SSE, fp_kind je 1f fninit / initialize the chip movl $x87_initial, %eax frstor (%eax) / load clean initial state ret 1: movl $sse_initial, %eax fxrstor (%eax) / load clean initial state ret SET_SIZE(fpinit) #endif /* __i386 */ #endif /* __lint */ /* * Clears FPU exception state. * Returns the FP status word. */ #if defined(__lint) uint32_t fperr_reset(void) { return (0); } uint32_t fpxerr_reset(void) { return (0); } #else /* __lint */ #if defined(__amd64) ENTRY_NP(fperr_reset) xorl %eax, %eax clts /* clear TS bit in CR0 */ fnstsw %ax /* get status */ fnclex /* clear processor exceptions */ ret SET_SIZE(fperr_reset) ENTRY_NP(fpxerr_reset) pushq %rbp movq %rsp, %rbp subq $0x10, %rsp /* make some temporary space */ clts /* clear TS bit in CR0 */ stmxcsr (%rsp) /* get status */ movl (%rsp), %eax andl $_BITNOT(SSE_MXCSR_EFLAGS), (%rsp) ldmxcsr (%rsp) /* clear processor exceptions */ leave ret SET_SIZE(fpxerr_reset) #elif defined(__i386) ENTRY_NP(fperr_reset) xorl %eax, %eax clts / clear TS bit in CR0 fnstsw %ax / get status fnclex / clear processor exceptions ret SET_SIZE(fperr_reset) ENTRY_NP(fpxerr_reset) clts / clear TS bit in CR0 subl $4, %esp / make some temporary space stmxcsr (%esp) / get status movl (%esp), %eax andl $_BITNOT(SSE_MXCSR_EFLAGS), (%esp) ldmxcsr (%esp) / clear processor exceptions addl $4, %esp ret SET_SIZE(fpxerr_reset) #endif /* __i386 */ #endif /* __lint */ #if defined(__lint) uint32_t fpgetcwsw(void) { return (0); } #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpgetcwsw) pushq %rbp movq %rsp, %rbp subq $0x10, %rsp /* make some temporary space */ clts /* clear TS bit in CR0 */ fnstsw (%rsp) /* store the status word */ fnstcw 2(%rsp) /* store the control word */ movl (%rsp), %eax /* put both in %eax */ leave ret SET_SIZE(fpgetcwsw) #elif defined(__i386) ENTRY_NP(fpgetcwsw) clts /* clear TS bit in CR0 */ subl $4, %esp /* make some temporary space */ fnstsw (%esp) /* store the status word */ fnstcw 2(%esp) /* store the control word */ movl (%esp), %eax /* put both in %eax */ addl $4, %esp ret SET_SIZE(fpgetcwsw) #endif /* __i386 */ #endif /* __lint */ /* * Returns the MXCSR register. */ #if defined(__lint) uint32_t fpgetmxcsr(void) { return (0); } #else /* __lint */ #if defined(__amd64) ENTRY_NP(fpgetmxcsr) pushq %rbp movq %rsp, %rbp subq $0x10, %rsp /* make some temporary space */ clts /* clear TS bit in CR0 */ stmxcsr (%rsp) /* get status */ movl (%rsp), %eax leave ret SET_SIZE(fpgetmxcsr) #elif defined(__i386) ENTRY_NP(fpgetmxcsr) clts /* clear TS bit in CR0 */ subl $4, %esp /* make some temporary space */ stmxcsr (%esp) /* get status */ movl (%esp), %eax addl $4, %esp ret SET_SIZE(fpgetmxcsr) #endif /* __i386 */ #endif /* __lint */