1*25c28e83SPiotr Jasiukajtis /*
2*25c28e83SPiotr Jasiukajtis * CDDL HEADER START
3*25c28e83SPiotr Jasiukajtis *
4*25c28e83SPiotr Jasiukajtis * The contents of this file are subject to the terms of the
5*25c28e83SPiotr Jasiukajtis * Common Development and Distribution License (the "License").
6*25c28e83SPiotr Jasiukajtis * You may not use this file except in compliance with the License.
7*25c28e83SPiotr Jasiukajtis *
8*25c28e83SPiotr Jasiukajtis * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9*25c28e83SPiotr Jasiukajtis * or http://www.opensolaris.org/os/licensing.
10*25c28e83SPiotr Jasiukajtis * See the License for the specific language governing permissions
11*25c28e83SPiotr Jasiukajtis * and limitations under the License.
12*25c28e83SPiotr Jasiukajtis *
13*25c28e83SPiotr Jasiukajtis * When distributing Covered Code, include this CDDL HEADER in each
14*25c28e83SPiotr Jasiukajtis * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15*25c28e83SPiotr Jasiukajtis * If applicable, add the following below this CDDL HEADER, with the
16*25c28e83SPiotr Jasiukajtis * fields enclosed by brackets "[]" replaced with your own identifying
17*25c28e83SPiotr Jasiukajtis * information: Portions Copyright [yyyy] [name of copyright owner]
18*25c28e83SPiotr Jasiukajtis *
19*25c28e83SPiotr Jasiukajtis * CDDL HEADER END
20*25c28e83SPiotr Jasiukajtis */
21*25c28e83SPiotr Jasiukajtis
22*25c28e83SPiotr Jasiukajtis /*
23*25c28e83SPiotr Jasiukajtis * Copyright 2011 Nexenta Systems, Inc. All rights reserved.
24*25c28e83SPiotr Jasiukajtis */
25*25c28e83SPiotr Jasiukajtis /*
26*25c28e83SPiotr Jasiukajtis * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
27*25c28e83SPiotr Jasiukajtis * Use is subject to license terms.
28*25c28e83SPiotr Jasiukajtis */
29*25c28e83SPiotr Jasiukajtis
30*25c28e83SPiotr Jasiukajtis #pragma weak fmaf = __fmaf
31*25c28e83SPiotr Jasiukajtis
32*25c28e83SPiotr Jasiukajtis #include "libm.h"
33*25c28e83SPiotr Jasiukajtis #include "fma.h"
34*25c28e83SPiotr Jasiukajtis #include "fenv_inlines.h"
35*25c28e83SPiotr Jasiukajtis
36*25c28e83SPiotr Jasiukajtis #if defined(__sparc)
37*25c28e83SPiotr Jasiukajtis
38*25c28e83SPiotr Jasiukajtis /*
39*25c28e83SPiotr Jasiukajtis * fmaf for SPARC: 32-bit single precision, big-endian
40*25c28e83SPiotr Jasiukajtis */
41*25c28e83SPiotr Jasiukajtis float
__fmaf(float x,float y,float z)42*25c28e83SPiotr Jasiukajtis __fmaf(float x, float y, float z) {
43*25c28e83SPiotr Jasiukajtis union {
44*25c28e83SPiotr Jasiukajtis unsigned i[2];
45*25c28e83SPiotr Jasiukajtis double d;
46*25c28e83SPiotr Jasiukajtis } xy, zz;
47*25c28e83SPiotr Jasiukajtis unsigned u, s;
48*25c28e83SPiotr Jasiukajtis int exy, ez;
49*25c28e83SPiotr Jasiukajtis
50*25c28e83SPiotr Jasiukajtis /*
51*25c28e83SPiotr Jasiukajtis * the following operations can only raise the invalid exception,
52*25c28e83SPiotr Jasiukajtis * and then only if either x*y is of the form Inf*0 or one of x,
53*25c28e83SPiotr Jasiukajtis * y, or z is a signaling NaN
54*25c28e83SPiotr Jasiukajtis */
55*25c28e83SPiotr Jasiukajtis xy.d = (double) x * y;
56*25c28e83SPiotr Jasiukajtis zz.d = (double) z;
57*25c28e83SPiotr Jasiukajtis
58*25c28e83SPiotr Jasiukajtis /*
59*25c28e83SPiotr Jasiukajtis * if the sum xy + z will be exact, just compute it and cast the
60*25c28e83SPiotr Jasiukajtis * result to float
61*25c28e83SPiotr Jasiukajtis */
62*25c28e83SPiotr Jasiukajtis exy = (xy.i[0] >> 20) & 0x7ff;
63*25c28e83SPiotr Jasiukajtis ez = (zz.i[0] >> 20) & 0x7ff;
64*25c28e83SPiotr Jasiukajtis if ((ez - exy <= 4 && exy - ez <= 28) || exy == 0x7ff || exy == 0 ||
65*25c28e83SPiotr Jasiukajtis ez == 0x7ff || ez == 0) {
66*25c28e83SPiotr Jasiukajtis return ((float) (xy.d + zz.d));
67*25c28e83SPiotr Jasiukajtis }
68*25c28e83SPiotr Jasiukajtis
69*25c28e83SPiotr Jasiukajtis /*
70*25c28e83SPiotr Jasiukajtis * collapse the tail of the smaller summand into a "sticky bit"
71*25c28e83SPiotr Jasiukajtis * so that the sum can be computed without error
72*25c28e83SPiotr Jasiukajtis */
73*25c28e83SPiotr Jasiukajtis if (ez > exy) {
74*25c28e83SPiotr Jasiukajtis if (ez - exy < 31) {
75*25c28e83SPiotr Jasiukajtis u = xy.i[1];
76*25c28e83SPiotr Jasiukajtis s = 2 << (ez - exy);
77*25c28e83SPiotr Jasiukajtis if (u & (s - 1))
78*25c28e83SPiotr Jasiukajtis u |= s;
79*25c28e83SPiotr Jasiukajtis xy.i[1] = u & ~(s - 1);
80*25c28e83SPiotr Jasiukajtis } else if (ez - exy < 51) {
81*25c28e83SPiotr Jasiukajtis u = xy.i[0];
82*25c28e83SPiotr Jasiukajtis s = 1 << (ez - exy - 31);
83*25c28e83SPiotr Jasiukajtis if ((u & (s - 1)) | xy.i[1])
84*25c28e83SPiotr Jasiukajtis u |= s;
85*25c28e83SPiotr Jasiukajtis xy.i[0] = u & ~(s - 1);
86*25c28e83SPiotr Jasiukajtis xy.i[1] = 0;
87*25c28e83SPiotr Jasiukajtis } else {
88*25c28e83SPiotr Jasiukajtis /* collapse all of xy into a single bit */
89*25c28e83SPiotr Jasiukajtis xy.i[0] = (xy.i[0] & 0x80000000) | ((ez - 51) << 20);
90*25c28e83SPiotr Jasiukajtis xy.i[1] = 0;
91*25c28e83SPiotr Jasiukajtis }
92*25c28e83SPiotr Jasiukajtis } else {
93*25c28e83SPiotr Jasiukajtis if (exy - ez < 31) {
94*25c28e83SPiotr Jasiukajtis u = zz.i[1];
95*25c28e83SPiotr Jasiukajtis s = 2 << (exy - ez);
96*25c28e83SPiotr Jasiukajtis if (u & (s - 1))
97*25c28e83SPiotr Jasiukajtis u |= s;
98*25c28e83SPiotr Jasiukajtis zz.i[1] = u & ~(s - 1);
99*25c28e83SPiotr Jasiukajtis } else if (exy - ez < 51) {
100*25c28e83SPiotr Jasiukajtis u = zz.i[0];
101*25c28e83SPiotr Jasiukajtis s = 1 << (exy - ez - 31);
102*25c28e83SPiotr Jasiukajtis if ((u & (s - 1)) | zz.i[1])
103*25c28e83SPiotr Jasiukajtis u |= s;
104*25c28e83SPiotr Jasiukajtis zz.i[0] = u & ~(s - 1);
105*25c28e83SPiotr Jasiukajtis zz.i[1] = 0;
106*25c28e83SPiotr Jasiukajtis } else {
107*25c28e83SPiotr Jasiukajtis /* collapse all of zz into a single bit */
108*25c28e83SPiotr Jasiukajtis zz.i[0] = (zz.i[0] & 0x80000000) | ((exy - 51) << 20);
109*25c28e83SPiotr Jasiukajtis zz.i[1] = 0;
110*25c28e83SPiotr Jasiukajtis }
111*25c28e83SPiotr Jasiukajtis }
112*25c28e83SPiotr Jasiukajtis
113*25c28e83SPiotr Jasiukajtis return ((float) (xy.d + zz.d));
114*25c28e83SPiotr Jasiukajtis }
115*25c28e83SPiotr Jasiukajtis
116*25c28e83SPiotr Jasiukajtis #elif defined(__x86)
117*25c28e83SPiotr Jasiukajtis
118*25c28e83SPiotr Jasiukajtis #if defined(__amd64)
119*25c28e83SPiotr Jasiukajtis #define NI 4
120*25c28e83SPiotr Jasiukajtis #else
121*25c28e83SPiotr Jasiukajtis #define NI 3
122*25c28e83SPiotr Jasiukajtis #endif
123*25c28e83SPiotr Jasiukajtis
124*25c28e83SPiotr Jasiukajtis /*
125*25c28e83SPiotr Jasiukajtis * fmaf for x86: 32-bit single precision, little-endian
126*25c28e83SPiotr Jasiukajtis */
127*25c28e83SPiotr Jasiukajtis float
__fmaf(float x,float y,float z)128*25c28e83SPiotr Jasiukajtis __fmaf(float x, float y, float z) {
129*25c28e83SPiotr Jasiukajtis union {
130*25c28e83SPiotr Jasiukajtis unsigned i[NI];
131*25c28e83SPiotr Jasiukajtis long double e;
132*25c28e83SPiotr Jasiukajtis } xy, zz;
133*25c28e83SPiotr Jasiukajtis unsigned u, s, cwsw, oldcwsw;
134*25c28e83SPiotr Jasiukajtis int exy, ez;
135*25c28e83SPiotr Jasiukajtis
136*25c28e83SPiotr Jasiukajtis /* set rounding precision to 64 bits */
137*25c28e83SPiotr Jasiukajtis __fenv_getcwsw(&oldcwsw);
138*25c28e83SPiotr Jasiukajtis cwsw = (oldcwsw & 0xfcffffff) | 0x03000000;
139*25c28e83SPiotr Jasiukajtis __fenv_setcwsw(&cwsw);
140*25c28e83SPiotr Jasiukajtis
141*25c28e83SPiotr Jasiukajtis /*
142*25c28e83SPiotr Jasiukajtis * the following operations can only raise the invalid exception,
143*25c28e83SPiotr Jasiukajtis * and then only if either x*y is of the form Inf*0 or one of x,
144*25c28e83SPiotr Jasiukajtis * y, or z is a signaling NaN
145*25c28e83SPiotr Jasiukajtis */
146*25c28e83SPiotr Jasiukajtis xy.e = (long double) x * y;
147*25c28e83SPiotr Jasiukajtis zz.e = (long double) z;
148*25c28e83SPiotr Jasiukajtis
149*25c28e83SPiotr Jasiukajtis /*
150*25c28e83SPiotr Jasiukajtis * if the sum xy + z will be exact, just compute it and cast the
151*25c28e83SPiotr Jasiukajtis * result to float
152*25c28e83SPiotr Jasiukajtis */
153*25c28e83SPiotr Jasiukajtis exy = xy.i[2] & 0x7fff;
154*25c28e83SPiotr Jasiukajtis ez = zz.i[2] & 0x7fff;
155*25c28e83SPiotr Jasiukajtis if ((ez - exy <= 15 && exy - ez <= 39) || exy == 0x7fff || exy == 0 ||
156*25c28e83SPiotr Jasiukajtis ez == 0x7fff || ez == 0) {
157*25c28e83SPiotr Jasiukajtis goto cont;
158*25c28e83SPiotr Jasiukajtis }
159*25c28e83SPiotr Jasiukajtis
160*25c28e83SPiotr Jasiukajtis /*
161*25c28e83SPiotr Jasiukajtis * collapse the tail of the smaller summand into a "sticky bit"
162*25c28e83SPiotr Jasiukajtis * so that the sum can be computed without error
163*25c28e83SPiotr Jasiukajtis */
164*25c28e83SPiotr Jasiukajtis if (ez > exy) {
165*25c28e83SPiotr Jasiukajtis if (ez - exy < 31) {
166*25c28e83SPiotr Jasiukajtis u = xy.i[0];
167*25c28e83SPiotr Jasiukajtis s = 2 << (ez - exy);
168*25c28e83SPiotr Jasiukajtis if (u & (s - 1))
169*25c28e83SPiotr Jasiukajtis u |= s;
170*25c28e83SPiotr Jasiukajtis xy.i[0] = u & ~(s - 1);
171*25c28e83SPiotr Jasiukajtis } else if (ez - exy < 62) {
172*25c28e83SPiotr Jasiukajtis u = xy.i[1];
173*25c28e83SPiotr Jasiukajtis s = 1 << (ez - exy - 31);
174*25c28e83SPiotr Jasiukajtis if ((u & (s - 1)) | xy.i[0])
175*25c28e83SPiotr Jasiukajtis u |= s;
176*25c28e83SPiotr Jasiukajtis xy.i[1] = u & ~(s - 1);
177*25c28e83SPiotr Jasiukajtis xy.i[0] = 0;
178*25c28e83SPiotr Jasiukajtis } else {
179*25c28e83SPiotr Jasiukajtis /* collapse all of xy into a single bit */
180*25c28e83SPiotr Jasiukajtis xy.i[0] = 0;
181*25c28e83SPiotr Jasiukajtis xy.i[1] = 0x80000000;
182*25c28e83SPiotr Jasiukajtis xy.i[2] = (xy.i[2] & 0x8000) | (ez - 62);
183*25c28e83SPiotr Jasiukajtis }
184*25c28e83SPiotr Jasiukajtis } else {
185*25c28e83SPiotr Jasiukajtis if (exy - ez < 62) {
186*25c28e83SPiotr Jasiukajtis u = zz.i[1];
187*25c28e83SPiotr Jasiukajtis s = 1 << (exy - ez - 31);
188*25c28e83SPiotr Jasiukajtis if ((u & (s - 1)) | zz.i[0])
189*25c28e83SPiotr Jasiukajtis u |= s;
190*25c28e83SPiotr Jasiukajtis zz.i[1] = u & ~(s - 1);
191*25c28e83SPiotr Jasiukajtis zz.i[0] = 0;
192*25c28e83SPiotr Jasiukajtis } else {
193*25c28e83SPiotr Jasiukajtis /* collapse all of zz into a single bit */
194*25c28e83SPiotr Jasiukajtis zz.i[0] = 0;
195*25c28e83SPiotr Jasiukajtis zz.i[1] = 0x80000000;
196*25c28e83SPiotr Jasiukajtis zz.i[2] = (zz.i[2] & 0x8000) | (exy - 62);
197*25c28e83SPiotr Jasiukajtis }
198*25c28e83SPiotr Jasiukajtis }
199*25c28e83SPiotr Jasiukajtis
200*25c28e83SPiotr Jasiukajtis cont:
201*25c28e83SPiotr Jasiukajtis xy.e += zz.e;
202*25c28e83SPiotr Jasiukajtis
203*25c28e83SPiotr Jasiukajtis /* restore the rounding precision */
204*25c28e83SPiotr Jasiukajtis __fenv_getcwsw(&cwsw);
205*25c28e83SPiotr Jasiukajtis cwsw = (cwsw & 0xfcffffff) | (oldcwsw & 0x03000000);
206*25c28e83SPiotr Jasiukajtis __fenv_setcwsw(&cwsw);
207*25c28e83SPiotr Jasiukajtis
208*25c28e83SPiotr Jasiukajtis return ((float) xy.e);
209*25c28e83SPiotr Jasiukajtis }
210*25c28e83SPiotr Jasiukajtis
211*25c28e83SPiotr Jasiukajtis #if 0
212*25c28e83SPiotr Jasiukajtis /*
213*25c28e83SPiotr Jasiukajtis * another fmaf for x86: assumes return value will be left in
214*25c28e83SPiotr Jasiukajtis * long double (80-bit double extended) precision
215*25c28e83SPiotr Jasiukajtis */
216*25c28e83SPiotr Jasiukajtis long double
217*25c28e83SPiotr Jasiukajtis __fmaf(float x, float y, float z) {
218*25c28e83SPiotr Jasiukajtis /*
219*25c28e83SPiotr Jasiukajtis * Note: This implementation assumes the rounding precision mode
220*25c28e83SPiotr Jasiukajtis * is set to the default, rounding to 64 bit precision. If this
221*25c28e83SPiotr Jasiukajtis * routine must work in non-default rounding precision modes, do
222*25c28e83SPiotr Jasiukajtis * the following instead:
223*25c28e83SPiotr Jasiukajtis *
224*25c28e83SPiotr Jasiukajtis * long double t;
225*25c28e83SPiotr Jasiukajtis *
226*25c28e83SPiotr Jasiukajtis * <set rp mode to round to 64 bit precision>
227*25c28e83SPiotr Jasiukajtis * t = x * y;
228*25c28e83SPiotr Jasiukajtis * <restore rp mode>
229*25c28e83SPiotr Jasiukajtis * return t + z;
230*25c28e83SPiotr Jasiukajtis *
231*25c28e83SPiotr Jasiukajtis * Note that the code to change rounding precision must not alter
232*25c28e83SPiotr Jasiukajtis * the exception masks or flags, since the product x * y may raise
233*25c28e83SPiotr Jasiukajtis * an invalid operation exception.
234*25c28e83SPiotr Jasiukajtis */
235*25c28e83SPiotr Jasiukajtis return ((long double) x * y + z);
236*25c28e83SPiotr Jasiukajtis }
237*25c28e83SPiotr Jasiukajtis #endif
238*25c28e83SPiotr Jasiukajtis
239*25c28e83SPiotr Jasiukajtis #else
240*25c28e83SPiotr Jasiukajtis #error Unknown architecture
241*25c28e83SPiotr Jasiukajtis #endif
242