1 /*- 2 * Copyright (c) 2003 Peter Wemm. 3 * Copyright (c) 1990 Andrew Moore, Talke Studio 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 35 * $FreeBSD$ 36 */ 37 38 /* 39 * IEEE floating point type and constant definitions. 40 */ 41 42 #ifndef _MACHINE_IEEEFP_H_ 43 #define _MACHINE_IEEEFP_H_ 44 45 #ifndef _SYS_CDEFS_H_ 46 #error this file needs sys/cdefs.h as a prerequisite 47 #endif 48 49 /* 50 * FP rounding modes 51 */ 52 typedef enum { 53 FP_RN=0, /* round to nearest */ 54 FP_RM, /* round down to minus infinity */ 55 FP_RP, /* round up to plus infinity */ 56 FP_RZ /* truncate */ 57 } fp_rnd_t; 58 59 /* 60 * FP precision modes 61 */ 62 typedef enum { 63 FP_PS=0, /* 24 bit (single-precision) */ 64 FP_PRS, /* reserved */ 65 FP_PD, /* 53 bit (double-precision) */ 66 FP_PE /* 64 bit (extended-precision) */ 67 } fp_prec_t; 68 69 #define fp_except_t int 70 71 /* 72 * FP exception masks 73 */ 74 #define FP_X_INV 0x01 /* invalid operation */ 75 #define FP_X_DNML 0x02 /* denormal */ 76 #define FP_X_DZ 0x04 /* zero divide */ 77 #define FP_X_OFL 0x08 /* overflow */ 78 #define FP_X_UFL 0x10 /* underflow */ 79 #define FP_X_IMP 0x20 /* (im)precision */ 80 #define FP_X_STK 0x40 /* stack fault */ 81 82 /* 83 * FP registers 84 */ 85 #define FP_MSKS_REG 0 /* exception masks */ 86 #define FP_PRC_REG 0 /* precision */ 87 #define FP_RND_REG 0 /* direction */ 88 #define FP_STKY_REG 1 /* sticky flags */ 89 90 /* 91 * FP register bit field masks 92 */ 93 #define FP_MSKS_FLD 0x3f /* exception masks field */ 94 #define FP_PRC_FLD 0x300 /* precision control field */ 95 #define FP_RND_FLD 0xc00 /* round control field */ 96 #define FP_STKY_FLD 0x3f /* sticky flags field */ 97 98 /* 99 * SSE mxcsr register bit field masks 100 */ 101 #define SSE_STKY_FLD 0x3f /* exception flags */ 102 #define SSE_DAZ_FLD 0x40 /* Denormals are zero */ 103 #define SSE_MSKS_FLD 0x1f80 /* exception masks field */ 104 #define SSE_RND_FLD 0x6000 /* rounding control */ 105 #define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ 106 107 /* 108 * FP register bit field offsets 109 */ 110 #define FP_MSKS_OFF 0 /* exception masks offset */ 111 #define FP_PRC_OFF 8 /* precision control offset */ 112 #define FP_RND_OFF 10 /* round control offset */ 113 #define FP_STKY_OFF 0 /* sticky flags offset */ 114 115 /* 116 * SSE mxcsr register bit field offsets 117 */ 118 #define SSE_STKY_OFF 0 /* exception flags offset */ 119 #define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ 120 #define SSE_MSKS_OFF 7 /* other exception masks offset */ 121 #define SSE_RND_OFF 13 /* rounding control offset */ 122 #define SSE_FZ_OFF 15 /* flush to zero offset */ 123 124 #if defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE__) \ 125 && !defined(__cplusplus) 126 127 #define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) 128 #define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) 129 #define __fldcw(addr) __asm __volatile("fldcw %0" : "=m" (*(addr))) 130 #define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 131 #define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 132 #define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : "=m" (*(addr))) 133 #define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) 134 135 /* 136 * General notes about conflicting SSE vs FP status bits. 137 * This code assumes that software will not fiddle with the control 138 * bits of the SSE and x87 in such a way to get them out of sync and 139 * still expect this to work. Break this at your peril. 140 * Because I based this on the i386 port, the x87 state is used for 141 * the fpget*() functions, and is shadowed into the SSE state for 142 * the fpset*() functions. For dual source fpget*() functions, I 143 * merge the two together. I think. 144 */ 145 146 /* Set rounding control */ 147 static __inline__ fp_rnd_t 148 __fpgetround(void) 149 { 150 unsigned short _cw; 151 152 __fnstcw(&_cw); 153 return ((_cw & FP_RND_FLD) >> FP_RND_OFF); 154 } 155 156 static __inline__ fp_rnd_t 157 __fpsetround(fp_rnd_t _m) 158 { 159 unsigned short _cw; 160 unsigned int _mxcsr; 161 fp_rnd_t _p; 162 163 __fnstcw(&_cw); 164 _p = (_cw & FP_RND_FLD) >> FP_RND_OFF; 165 _cw &= ~FP_RND_FLD; 166 _cw |= (_m << FP_RND_OFF) & FP_RND_FLD; 167 __fldcw(&_cw); 168 __stmxcsr(&_mxcsr); 169 _mxcsr &= ~SSE_RND_FLD; 170 _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; 171 __ldmxcsr(&_mxcsr); 172 return (_p); 173 } 174 175 /* 176 * Set precision for fadd/fsub/fsqrt etc x87 instructions 177 * There is no equivalent SSE mode or control. 178 */ 179 static __inline__ fp_prec_t 180 __fpgetprec(void) 181 { 182 unsigned short _cw; 183 184 __fnstcw(&_cw); 185 return ((_cw & FP_PRC_FLD) >> FP_PRC_OFF); 186 } 187 188 static __inline__ fp_prec_t 189 __fpsetprec(fp_rnd_t _m) 190 { 191 unsigned short _cw; 192 fp_prec_t _p; 193 194 __fnstcw(&_cw); 195 _p = (_cw & FP_PRC_FLD) >> FP_PRC_OFF; 196 _cw &= ~FP_PRC_FLD; 197 _cw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; 198 __fldcw(&_cw); 199 return (_p); 200 } 201 202 /* 203 * Look at the exception masks 204 * Note that x87 masks are inverse of the fp*() functions 205 * API. ie: mask = 1 means disable for x87 and SSE, but 206 * for the fp*() api, mask = 1 means enabled. 207 */ 208 static __inline__ fp_except_t 209 __fpgetmask(void) 210 { 211 unsigned short _cw; 212 213 __fnstcw(&_cw); 214 return ((~_cw) & FP_MSKS_FLD); 215 } 216 217 static __inline__ fp_except_t 218 __fpsetmask(fp_except_t _m) 219 { 220 unsigned short _cw; 221 unsigned int _mxcsr; 222 fp_except_t _p; 223 224 __fnstcw(&_cw); 225 _p = (~_cw) & FP_MSKS_FLD; 226 _cw &= ~FP_MSKS_FLD; 227 _cw |= (~_m) & FP_MSKS_FLD; 228 __fldcw(&_cw); 229 __stmxcsr(&_mxcsr); 230 /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ 231 _mxcsr &= ~SSE_MSKS_FLD; 232 _mxcsr |= ((~_m) << SSE_MSKS_OFF) & SSE_MSKS_FLD; 233 __ldmxcsr(&_mxcsr); 234 return (_p); 235 } 236 237 /* See which sticky exceptions are pending, and reset them */ 238 static __inline__ fp_except_t 239 __fpgetsticky(void) 240 { 241 unsigned short _sw; 242 unsigned int _mxcsr; 243 fp_except_t _ex; 244 245 __fnstsw(&_sw); 246 _ex = _sw & FP_STKY_FLD; 247 __stmxcsr(&_mxcsr); 248 _ex |= _mxcsr & SSE_STKY_FLD; 249 return (_ex); 250 } 251 252 /* Note that this should really be called fpresetsticky() */ 253 static __inline__ fp_except_t 254 __fpsetsticky(fp_except_t _m) 255 { 256 unsigned _env[7]; 257 unsigned int _mxcsr; 258 fp_except_t _p; 259 260 __fnstenv(_env); 261 _p = _env[FP_STKY_REG] & _m; 262 __stmxcsr(&_mxcsr); 263 _p |= _mxcsr & SSE_STKY_FLD; 264 _env[FP_STKY_REG] &= ~_m; 265 __fldenv(_env); 266 _mxcsr &= ~_m; 267 __ldmxcsr(&_mxcsr); 268 return (_p); 269 } 270 271 #endif /* __GNUCLIKE_ASM && __CC_SUPPORTS___INLINE__ && !__cplusplus */ 272 273 #if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) \ 274 && defined(__GNUCLIKE_ASM) && defined(__CC_SUPPORTS___INLINE__) 275 276 #define fpgetround() __fpgetround() 277 #define fpsetround(_m) __fpsetround(_m) 278 #define fpgetprec() __fpgetprec() 279 #define fpsetprec(_m) __fpsetprec(_m) 280 #define fpgetmask() __fpgetmask() 281 #define fpsetmask(_m) __fpsetmask(_m) 282 #define fpgetsticky() __fpgetsticky() 283 #define fpsetsticky(_m) __fpsetsticky(_m) 284 285 /* Suppress prototypes in the MI header. */ 286 #define _IEEEFP_INLINED_ 1 287 288 #else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUCLIKE_ASM 289 && __CC_SUPPORTS___INLINE__ */ 290 291 /* Augment the userland declarations */ 292 __BEGIN_DECLS 293 extern fp_prec_t fpgetprec(void); 294 extern fp_prec_t fpsetprec(fp_prec_t); 295 __END_DECLS 296 297 #endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUCLIKE_ASM 298 && __CC_SUPPORTS___INLINE__ */ 299 300 #endif /* !_MACHINE_IEEEFP_H_ */ 301