1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright 2011 Nexenta Systems, Inc. All rights reserved. 24 */ 25 /* 26 * Copyright 2006 Sun Microsystems, Inc. All rights reserved. 27 * Use is subject to license terms. 28 */ 29 30 #pragma weak fmaf = __fmaf 31 32 #include "libm.h" 33 #include "fma.h" 34 #include "fenv_inlines.h" 35 36 #if defined(__sparc) 37 38 /* 39 * fmaf for SPARC: 32-bit single precision, big-endian 40 */ 41 float 42 __fmaf(float x, float y, float z) { 43 union { 44 unsigned i[2]; 45 double d; 46 } xy, zz; 47 unsigned u, s; 48 int exy, ez; 49 50 /* 51 * the following operations can only raise the invalid exception, 52 * and then only if either x*y is of the form Inf*0 or one of x, 53 * y, or z is a signaling NaN 54 */ 55 xy.d = (double) x * y; 56 zz.d = (double) z; 57 58 /* 59 * if the sum xy + z will be exact, just compute it and cast the 60 * result to float 61 */ 62 exy = (xy.i[0] >> 20) & 0x7ff; 63 ez = (zz.i[0] >> 20) & 0x7ff; 64 if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 || 65 ez == 0x7ff || ez == 0) { 66 return ((float) (xy.d + zz.d)); 67 } 68 69 /* 70 * collapse the tail of the smaller summand into a "sticky bit" 71 * so that the sum can be computed without error 72 */ 73 if (ez > exy) { 74 if (ez - exy < 31) { 75 u = xy.i[1]; 76 s = 2 << (ez - exy); 77 if (u & (s - 1)) 78 u |= s; 79 xy.i[1] = u & ~(s - 1); 80 } else if (ez - exy < 51) { 81 u = xy.i[0]; 82 s = 1 << (ez - exy - 31); 83 if ((u & (s - 1)) | xy.i[1]) 84 u |= s; 85 xy.i[0] = u & ~(s - 1); 86 xy.i[1] = 0; 87 } else { 88 /* collapse all of xy into a single bit */ 89 xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20); 90 xy.i[1] = 0; 91 } 92 } else { 93 if (exy - ez < 31) { 94 u = zz.i[1]; 95 s = 2 << (exy - ez); 96 if (u & (s - 1)) 97 u |= s; 98 zz.i[1] = u & ~(s - 1); 99 } else if (exy - ez < 51) { 100 u = zz.i[0]; 101 s = 1 << (exy - ez - 31); 102 if ((u & (s - 1)) | zz.i[1]) 103 u |= s; 104 zz.i[0] = u & ~(s - 1); 105 zz.i[1] = 0; 106 } else { 107 /* collapse all of zz into a single bit */ 108 zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20); 109 zz.i[1] = 0; 110 } 111 } 112 113 return ((float) (xy.d + zz.d)); 114 } 115 116 #elif defined(__x86) 117 118 #if defined(__amd64) 119 #define NI 4 120 #else 121 #define NI 3 122 #endif 123 124 /* 125 * fmaf for x86: 32-bit single precision, little-endian 126 */ 127 float 128 __fmaf(float x, float y, float z) { 129 union { 130 unsigned i[NI]; 131 long double e; 132 } xy, zz; 133 unsigned u, s, cwsw, oldcwsw; 134 int exy, ez; 135 136 /* set rounding precision to 64 bits */ 137 __fenv_getcwsw(&oldcwsw); 138 cwsw = (oldcwsw & 0xfcffffff) | 0x03000000; 139 __fenv_setcwsw(&cwsw); 140 141 /* 142 * the following operations can only raise the invalid exception, 143 * and then only if either x*y is of the form Inf*0 or one of x, 144 * y, or z is a signaling NaN 145 */ 146 xy.e = (long double) x * y; 147 zz.e = (long double) z; 148 149 /* 150 * if the sum xy + z will be exact, just compute it and cast the 151 * result to float 152 */ 153 exy = xy.i[2] & 0x7fff; 154 ez = zz.i[2] & 0x7fff; 155 if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 || 156 ez == 0x7fff || ez == 0) { 157 goto cont; 158 } 159 160 /* 161 * collapse the tail of the smaller summand into a "sticky bit" 162 * so that the sum can be computed without error 163 */ 164 if (ez > exy) { 165 if (ez - exy < 31) { 166 u = xy.i[0]; 167 s = 2 << (ez - exy); 168 if (u & (s - 1)) 169 u |= s; 170 xy.i[0] = u & ~(s - 1); 171 } else if (ez - exy < 62) { 172 u = xy.i[1]; 173 s = 1 << (ez - exy - 31); 174 if ((u & (s - 1)) | xy.i[0]) 175 u |= s; 176 xy.i[1] = u & ~(s - 1); 177 xy.i[0] = 0; 178 } else { 179 /* collapse all of xy into a single bit */ 180 xy.i[0] = 0; 181 xy.i[1] = 0x80000000; 182 xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62); 183 } 184 } else { 185 if (exy - ez < 62) { 186 u = zz.i[1]; 187 s = 1 << (exy - ez - 31); 188 if ((u & (s - 1)) | zz.i[0]) 189 u |= s; 190 zz.i[1] = u & ~(s - 1); 191 zz.i[0] = 0; 192 } else { 193 /* collapse all of zz into a single bit */ 194 zz.i[0] = 0; 195 zz.i[1] = 0x80000000; 196 zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62); 197 } 198 } 199 200 cont: 201 xy.e += zz.e; 202 203 /* restore the rounding precision */ 204 __fenv_getcwsw(&cwsw); 205 cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000); 206 __fenv_setcwsw(&cwsw); 207 208 return ((float) xy.e); 209 } 210 211 #if 0 212 /* 213 * another fmaf for x86: assumes return value will be left in 214 * long double (80-bit double extended) precision 215 */ 216 long double 217 __fmaf(float x, float y, float z) { 218 /* 219 * Note: This implementation assumes the rounding precision mode 220 * is set to the default, rounding to 64 bit precision. If this 221 * routine must work in non-default rounding precision modes, do 222 * the following instead: 223 * 224 * long double t; 225 * 226 * <set rp mode to round to 64 bit precision> 227 * t = x * y; 228 * <restore rp mode> 229 * return t + z; 230 * 231 * Note that the code to change rounding precision must not alter 232 * the exception masks or flags, since the product x * y may raise 233 * an invalid operation exception. 234 */ 235 return ((long double) x * y + z); 236 } 237 #endif 238 239 #else 240 #error Unknown architecture 241 #endif 242