1 /*- 2 * Copyright (c) 1990 Andrew Moore, Talke Studio 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * from: @(#) ieeefp.h 1.0 (Berkeley) 9/23/93 34 * $FreeBSD$ 35 */ 36 37 /* 38 * IEEE floating point type and constant definitions. 39 */ 40 41 #ifndef _MACHINE_IEEEFP_H_ 42 #define _MACHINE_IEEEFP_H_ 43 44 /* 45 * FP rounding modes 46 */ 47 typedef enum { 48 FP_RN=0, /* round to nearest */ 49 FP_RM, /* round down to minus infinity */ 50 FP_RP, /* round up to plus infinity */ 51 FP_RZ /* truncate */ 52 } fp_rnd_t; 53 54 /* 55 * FP precision modes 56 */ 57 typedef enum { 58 FP_PS=0, /* 24 bit (single-precision) */ 59 FP_PRS, /* reserved */ 60 FP_PD, /* 53 bit (double-precision) */ 61 FP_PE /* 64 bit (extended-precision) */ 62 } fp_prec_t; 63 64 #define fp_except_t int 65 66 /* 67 * FP exception masks 68 */ 69 #define FP_X_INV 0x01 /* invalid operation */ 70 #define FP_X_DNML 0x02 /* denormal */ 71 #define FP_X_DZ 0x04 /* zero divide */ 72 #define FP_X_OFL 0x08 /* overflow */ 73 #define FP_X_UFL 0x10 /* underflow */ 74 #define FP_X_IMP 0x20 /* (im)precision */ 75 #define FP_X_STK 0x40 /* stack fault */ 76 77 /* 78 * FP registers 79 */ 80 #define FP_MSKS_REG 0 /* exception masks */ 81 #define FP_PRC_REG 0 /* precision */ 82 #define FP_RND_REG 0 /* direction */ 83 #define FP_STKY_REG 1 /* sticky flags */ 84 85 /* 86 * FP register bit field masks 87 */ 88 #define FP_MSKS_FLD 0x3f /* exception masks field */ 89 #define FP_PRC_FLD 0x300 /* precision control field */ 90 #define FP_RND_FLD 0xc00 /* round control field */ 91 #define FP_STKY_FLD 0x3f /* sticky flags field */ 92 93 /* 94 * SSE mxcsr register bit field masks 95 */ 96 #define SSE_STKY_FLD 0x3f /* exception flags */ 97 #define SSE_DAZ_FLD 0x40 /* Denormals are zero */ 98 #define SSE_MSKS_FLD 0x1f80 /* exception masks field */ 99 #define SSE_RND_FLD 0x6000 /* rounding control */ 100 #define SSE_FZ_FLD 0x8000 /* flush to zero on underflow */ 101 102 /* 103 * FP register bit field offsets 104 */ 105 #define FP_MSKS_OFF 0 /* exception masks offset */ 106 #define FP_PRC_OFF 8 /* precision control offset */ 107 #define FP_RND_OFF 10 /* round control offset */ 108 #define FP_STKY_OFF 0 /* sticky flags offset */ 109 110 /* 111 * SSE mxcsr register bit field offsets 112 */ 113 #define SSE_STKY_OFF 0 /* exception flags offset */ 114 #define SSE_DAZ_OFF 6 /* DAZ exception mask offset */ 115 #define SSE_MSKS_OFF 7 /* other exception masks offset */ 116 #define SSE_RND_OFF 13 /* rounding control offset */ 117 #define SSE_FZ_OFF 15 /* flush to zero offset */ 118 119 #if defined(__GNUC__) && !defined(__cplusplus) 120 121 #define __fldenv(addr) __asm __volatile("fldenv %0" : : "m" (*(addr))) 122 #define __fnstenv(addr) __asm __volatile("fnstenv %0" : "=m" (*(addr))) 123 #define __fldcw(addr) __asm __volatile("fldcw %0" : "=m" (*(addr))) 124 #define __fnstcw(addr) __asm __volatile("fnstcw %0" : "=m" (*(addr))) 125 #define __fnstsw(addr) __asm __volatile("fnstsw %0" : "=m" (*(addr))) 126 #define __ldmxcsr(addr) __asm __volatile("ldmxcsr %0" : "=m" (*(addr))) 127 #define __stmxcsr(addr) __asm __volatile("stmxcsr %0" : "=m" (*(addr))) 128 129 /* 130 * General notes about conflicting SSE vs FP status bits. 131 * This code assumes that software will not fiddle with the control 132 * bits of the SSE and x87 in such a way to get them out of sync and 133 * still expect this to work. Break this at your peril. 134 * Because I based this on the i386 port, the x87 state is used for 135 * the fpget*() functions, and is shadowed into the SSE state for 136 * the fpset*() functions. For dual source fpget*() functions, I 137 * merge the two together. I think. 138 */ 139 140 /* Set rounding control */ 141 static __inline__ fp_rnd_t 142 __fpgetround(void) 143 { 144 unsigned short _cw; 145 146 __fnstcw(&_cw); 147 return ((_cw & FP_RND_FLD) >> FP_RND_OFF); 148 } 149 150 static __inline__ fp_rnd_t 151 __fpsetround(fp_rnd_t _m) 152 { 153 unsigned short _cw; 154 unsigned int _mxcsr; 155 fp_rnd_t _p; 156 157 __fnstcw(&_cw); 158 _p = (_cw & FP_RND_FLD) >> FP_RND_OFF; 159 _cw &= ~FP_RND_FLD; 160 _cw |= (_m << FP_RND_OFF) & FP_RND_FLD; 161 __fldcw(&_cw); 162 __stmxcsr(&_mxcsr); 163 _mxcsr &= ~SSE_RND_FLD; 164 _mxcsr |= (_m << SSE_RND_OFF) & SSE_RND_FLD; 165 __ldmxcsr(&_mxcsr); 166 return (_p); 167 } 168 169 /* 170 * Set precision for fadd/fsub/fsqrt etc x87 instructions 171 * There is no equivalent SSE mode or control. 172 */ 173 static __inline__ fp_prec_t 174 __fpgetprec(void) 175 { 176 unsigned short _cw; 177 178 __fnstcw(&_cw); 179 return ((_cw & FP_PRC_FLD) >> FP_PRC_OFF); 180 } 181 182 static __inline__ fp_prec_t 183 __fpsetprec(fp_rnd_t _m) 184 { 185 unsigned short _cw; 186 fp_prec_t _p; 187 188 __fnstcw(&_cw); 189 _p = (_cw & FP_PRC_FLD) >> FP_PRC_OFF; 190 _cw &= ~FP_PRC_FLD; 191 _cw |= (_m << FP_PRC_OFF) & FP_PRC_FLD; 192 __fldcw(&_cw); 193 return (_p); 194 } 195 196 /* 197 * Look at the exception masks 198 * Note that x87 masks are inverse of the fp*() functions 199 * API. ie: mask = 1 means disable for x87 and SSE, but 200 * for the fp*() api, mask = 1 means enabled. 201 */ 202 static __inline__ fp_except_t 203 __fpgetmask(void) 204 { 205 unsigned short _cw; 206 207 __fnstcw(&_cw); 208 return ((~_cw) & FP_MSKS_FLD); 209 } 210 211 static __inline__ fp_except_t 212 __fpsetmask(fp_except_t _m) 213 { 214 unsigned short _cw; 215 unsigned int _mxcsr; 216 fp_except_t _p; 217 218 __fnstcw(&_cw); 219 _p = (~_cw) & FP_MSKS_FLD; 220 _cw &= ~FP_MSKS_FLD; 221 _cw |= (~_m) & FP_MSKS_FLD; 222 __fldcw(&_cw); 223 __stmxcsr(&_mxcsr); 224 /* XXX should we clear non-ieee SSE_DAZ_FLD and SSE_FZ_FLD ? */ 225 _mxcsr &= ~SSE_MSKS_FLD; 226 _mxcsr |= ((~_m) << SSE_MSKS_OFF) & SSE_MSKS_FLD; 227 __ldmxcsr(&_mxcsr); 228 return (_p); 229 } 230 231 /* See which sticky exceptions are pending, and reset them */ 232 static __inline__ fp_except_t 233 __fpgetsticky(void) 234 { 235 unsigned short _sw; 236 unsigned int _mxcsr; 237 fp_except_t _ex; 238 239 __fnstsw(&_sw); 240 _ex = _sw & FP_STKY_FLD; 241 __stmxcsr(&_mxcsr); 242 _ex |= _mxcsr & SSE_STKY_FLD; 243 return (_ex); 244 } 245 246 /* Note that this should really be called fpresetsticky() */ 247 static __inline__ fp_except_t 248 __fpsetsticky(fp_except_t _m) 249 { 250 unsigned _env[7]; 251 unsigned int _mxcsr; 252 fp_except_t _p; 253 254 __fnstenv(_env); 255 _p = _env[FP_STKY_REG] & _m; 256 __stmxcsr(&_mxcsr); 257 _p |= _mxcsr & SSE_STKY_FLD; 258 _env[FP_STKY_REG] &= ~_m; 259 __fldenv(_env); 260 _mxcsr &= ~_m; 261 __ldmxcsr(&_mxcsr); 262 return (_p); 263 } 264 265 #endif /* __GNUC__ && !__cplusplus */ 266 267 #if !defined(__IEEEFP_NOINLINES__) && !defined(__cplusplus) && defined(__GNUC__) 268 269 #define fpsetround() __fpsetround() 270 #define fpgetround(_m) __fpgetround(_m) 271 #define fpgetprec() __fpgetprec() 272 #define fpsetprec(_m) __fpsetprec(_m) 273 #define fpgetmask() __fpgetmask() 274 #define fpsetmask(_m) __fpsetmask(_m) 275 #define fpgetsticky() __fpgetsticky() 276 #define fpsetsticky(_m) __fpsetsticky(_m) 277 278 /* Suppress prototypes in the MI header. */ 279 #define _IEEEFP_INLINED_ 1 280 281 #else /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ 282 283 /* Augment the userland declarations */ 284 __BEGIN_DECLS 285 extern fp_prec_t fpgetprec(void); 286 extern fp_prec_t fpsetprec(fp_prec_t); 287 __END_DECLS 288 289 #endif /* !__IEEEFP_NOINLINES__ && !__cplusplus && __GNUC__ */ 290 291 #endif /* !_MACHINE_IEEEFP_H_ */ 292