1 /*- 2 * Copyright (c) 2003 Peter Wemm. 3 * Copyright (c) 1990 Andrew Moore, Talke Studio 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice, this list of conditions and the following disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. All advertising materials mentioning features or use of this software 15 * must display the following acknowledgement: 16 * This product includes software developed by the University of 17 * California, Berkeley and its contributors. 18 * 4. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 35 * $FreeBSD$ 36 */ 37 38 /* 39 * IEEE floating point type and constant definitions. 40 */ 41 42 #ifndef _MACHINE_IEEEFP_H_ 43 #define _MACHINE_IEEEFP_H_ 44 45 /* 46 * FP rounding modes 47 */ 48 typedef enum { 49 FP_RN=0, /* round to nearest */ 50 FP_RM, /* round down to minus infinity */ 51 FP_RP, /* round up to plus infinity */ 52 FP_RZ /* truncate */ 53 } fp_rnd_t; 54 55 /* 56 * FP precision modes 57 */ 58 typedef enum { 59 FP_PS=0, /* 24 bit (single-precision) */ 60 FP_PRS, /* reserved */ 61 FP_PD, /* 53 bit (double-precision) */ 62 FP_PE /* 64 bit (extended-precision) */ 63 } fp_prec_t; 64 65 #define fp_except_t int 66 67 /* 68 * FP exception masks 69 */ 70 #define FP_X_INV 0x01 /* invalid operation */ 71 #define FP_X_DNML 0x02 /* denormal */ 72 #define FP_X_DZ 0x04 /* zero divide */ 73 #define FP_X_OFL 0x08 /* overflow */ 74 #define FP_X_UFL 0x10 /* underflow */ 75 #define FP_X_IMP 0x20 /* (im)precision */ 76 #define FP_X_STK 0x40 /* stack fault */ 77 78 /* 79 * FP registers 80 */ 81 #define FP_MSKS_REG 0 /* exception masks */ 82 #define FP_PRC_REG 0 /* precision */ 83 #define FP_RND_REG 0 /* direction */ 84 #define FP_STKY_REG 1 /* sticky flags */ 85 86 /* 87 * FP register bit field masks 88 */ 89 #define FP_MSKS_FLD 0x3f /* exception masks field */ 90 #define FP_PRC_FLD 0x300 /* precision control field */ 91 #define FP_RND_FLD 0xc00 /* round control field */ 92 #define FP_STKY_FLD 0x3f /* sticky flags field */ 93 94 /* 95 * SSE mxcsr register bit field masks 96 */ 97 #define SSE_STKY_FLD 0x3f /* exception flags */ 98 #define SSE_DAZ_FLD 0x40 /* Denormals are zero */ 99 #define SSE_MSKS_FLD 0x1f80 /* exception masks field */ 100 #define SSE_RND_FLD 0x6000 /* rounding control */ 101 #define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ 102 103 /* 104 * FP register bit field offsets 105 */ 106 #define FP_MSKS_OFF 0 /* exception masks offset */ 107 #define FP_PRC_OFF 8 /* precision control offset */ 108 #define FP_RND_OFF 10 /* round control offset */ 109 #define FP_STKY_OFF 0 /* sticky flags offset */ 110 111 /* 112 * SSE mxcsr register bit field offsets 113 */ 114 #define SSE_STKY_OFF 0 /* exception flags offset */ 115 #define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ 116 #define SSE_MSKS_OFF 7 /* other exception masks offset */ 117 #define SSE_RND_OFF 13 /* rounding control offset */ 118 #define SSE_FZ_OFF 15 /* flush to zero offset */ 119 120 #if defined(__GNUC__) && !defined(__cplusplus) 121 122 #define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) 123 #define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) 124 #define __fldcw(addr) __asm __volatile("fldcw %0" : "=m" (*(addr))) 125 #define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 126 #define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 127 #define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : "=m" (*(addr))) 128 #define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) 129 130 /* 131 * General notes about conflicting SSE vs FP status bits. 132 * This code assumes that software will not fiddle with the control 133 * bits of the SSE and x87 in such a way to get them out of sync and 134 * still expect this to work. Break this at your peril. 135 * Because I based this on the i386 port, the x87 state is used for 136 * the fpget*() functions, and is shadowed into the SSE state for 137 * the fpset*() functions. For dual source fpget*() functions, I 138 * merge the two together. I think. 139 */ 140 141 /* Set rounding control */ 142 static __inline__ fp_rnd_t 143 __fpgetround(void) 144 { 145 unsigned short _cw; 146 147 __fnstcw(&_cw); 148 return ((_cw & FP_RND_FLD) >> FP_RND_OFF); 149 } 150 151 static __inline__ fp_rnd_t 152 __fpsetround(fp_rnd_t _m) 153 { 154 unsigned short _cw; 155 unsigned int _mxcsr; 156 fp_rnd_t _p; 157 158 __fnstcw(&_cw); 159 _p = (_cw & FP_RND_FLD) >> FP_RND_OFF; 160 _cw &= ~FP_RND_FLD; 161 _cw |= (_m << FP_RND_OFF) & FP_RND_FLD; 162 __fldcw(&_cw); 163 __stmxcsr(&_mxcsr); 164 _mxcsr &= ~SSE_RND_FLD; 165 _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; 166 __ldmxcsr(&_mxcsr); 167 return (_p); 168 } 169 170 /* 171 * Set precision for fadd/fsub/fsqrt etc x87 instructions 172 * There is no equivalent SSE mode or control. 173 */ 174 static __inline__ fp_prec_t 175 __fpgetprec(void) 176 { 177 unsigned short _cw; 178 179 __fnstcw(&_cw); 180 return ((_cw & FP_PRC_FLD) >> FP_PRC_OFF); 181 } 182 183 static __inline__ fp_prec_t 184 __fpsetprec(fp_rnd_t _m) 185 { 186 unsigned short _cw; 187 fp_prec_t _p; 188 189 __fnstcw(&_cw); 190 _p = (_cw & FP_PRC_FLD) >> FP_PRC_OFF; 191 _cw &= ~FP_PRC_FLD; 192 _cw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; 193 __fldcw(&_cw); 194 return (_p); 195 } 196 197 /* 198 * Look at the exception masks 199 * Note that x87 masks are inverse of the fp*() functions 200 * API. ie: mask = 1 means disable for x87 and SSE, but 201 * for the fp*() api, mask = 1 means enabled. 202 */ 203 static __inline__ fp_except_t 204 __fpgetmask(void) 205 { 206 unsigned short _cw; 207 208 __fnstcw(&_cw); 209 return ((~_cw) & FP_MSKS_FLD); 210 } 211 212 static __inline__ fp_except_t 213 __fpsetmask(fp_except_t _m) 214 { 215 unsigned short _cw; 216 unsigned int _mxcsr; 217 fp_except_t _p; 218 219 __fnstcw(&_cw); 220 _p = (~_cw) & FP_MSKS_FLD; 221 _cw &= ~FP_MSKS_FLD; 222 _cw |= (~_m) & FP_MSKS_FLD; 223 __fldcw(&_cw); 224 __stmxcsr(&_mxcsr); 225 /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ 226 _mxcsr &= ~SSE_MSKS_FLD; 227 _mxcsr |= ((~_m) << SSE_MSKS_OFF) & SSE_MSKS_FLD; 228 __ldmxcsr(&_mxcsr); 229 return (_p); 230 } 231 232 /* See which sticky exceptions are pending, and reset them */ 233 static __inline__ fp_except_t 234 __fpgetsticky(void) 235 { 236 unsigned short _sw; 237 unsigned int _mxcsr; 238 fp_except_t _ex; 239 240 __fnstsw(&_sw); 241 _ex = _sw & FP_STKY_FLD; 242 __stmxcsr(&_mxcsr); 243 _ex |= _mxcsr & SSE_STKY_FLD; 244 return (_ex); 245 } 246 247 /* Note that this should really be called fpresetsticky() */ 248 static __inline__ fp_except_t 249 __fpsetsticky(fp_except_t _m) 250 { 251 unsigned _env[7]; 252 unsigned int _mxcsr; 253 fp_except_t _p; 254 255 __fnstenv(_env); 256 _p = _env[FP_STKY_REG] & _m; 257 __stmxcsr(&_mxcsr); 258 _p |= _mxcsr & SSE_STKY_FLD; 259 _env[FP_STKY_REG] &= ~_m; 260 __fldenv(_env); 261 _mxcsr &= ~_m; 262 __ldmxcsr(&_mxcsr); 263 return (_p); 264 } 265 266 #endif /* __GNUC__ && !__cplusplus */ 267 268 #if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) && defined(__GNUC__) 269 270 #define fpgetround() __fpgetround() 271 #define fpsetround(_m) __fpsetround(_m) 272 #define fpgetprec() __fpgetprec() 273 #define fpsetprec(_m) __fpsetprec(_m) 274 #define fpgetmask() __fpgetmask() 275 #define fpsetmask(_m) __fpsetmask(_m) 276 #define fpgetsticky() __fpgetsticky() 277 #define fpsetsticky(_m) __fpsetsticky(_m) 278 279 /* Suppress prototypes in the MI header. */ 280 #define _IEEEFP_INLINED_ 1 281 282 #else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ 283 284 /* Augment the userland declarations */ 285 __BEGIN_DECLS 286 extern fp_prec_t fpgetprec(void); 287 extern fp_prec_t fpsetprec(fp_prec_t); 288 __END_DECLS 289 290 #endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ 291 292 #endif /* !_MACHINE_IEEEFP_H_ */ 293