xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/sve/powf.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Single-precision SVE powf function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2023-2025, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #include "sv_math.h"
9*f3087befSAndrew Turner #include "test_sig.h"
10*f3087befSAndrew Turner #include "test_defs.h"
11*f3087befSAndrew Turner 
12*f3087befSAndrew Turner /* The following data is used in the SVE pow core computation
13*f3087befSAndrew Turner    and special case detection.  */
14*f3087befSAndrew Turner #define Tinvc __v_powf_data.invc
15*f3087befSAndrew Turner #define Tlogc __v_powf_data.logc
16*f3087befSAndrew Turner #define Texp __v_powf_data.scale
17*f3087befSAndrew Turner #define SignBias (1 << (V_POWF_EXP2_TABLE_BITS + 11))
18*f3087befSAndrew Turner #define Norm 0x1p23f /* 0x4b000000.  */
19*f3087befSAndrew Turner 
20*f3087befSAndrew Turner /* Overall ULP error bound for pow is 2.6 ulp
21*f3087befSAndrew Turner    ~ 0.5 + 2^24 (128*Ln2*relerr_log2 + relerr_exp2).  */
22*f3087befSAndrew Turner static const struct data
23*f3087befSAndrew Turner {
24*f3087befSAndrew Turner   double log_poly[4];
25*f3087befSAndrew Turner   double exp_poly[3];
26*f3087befSAndrew Turner   float uflow_bound, oflow_bound, small_bound;
27*f3087befSAndrew Turner   uint32_t sign_bias, subnormal_bias, off;
28*f3087befSAndrew Turner } data = {
29*f3087befSAndrew Turner   /* rel err: 1.5 * 2^-30. Each coefficients is multiplied the value of
30*f3087befSAndrew Turner      V_POWF_EXP2_N.  */
31*f3087befSAndrew Turner   .log_poly = { -0x1.6ff5daa3b3d7cp+3, 0x1.ec81d03c01aebp+3,
32*f3087befSAndrew Turner 		-0x1.71547bb43f101p+4, 0x1.7154764a815cbp+5 },
33*f3087befSAndrew Turner   /* rel err: 1.69 * 2^-34.  */
34*f3087befSAndrew Turner   .exp_poly = {
35*f3087befSAndrew Turner     0x1.c6af84b912394p-20, /* A0 / V_POWF_EXP2_N^3.  */
36*f3087befSAndrew Turner     0x1.ebfce50fac4f3p-13, /* A1 / V_POWF_EXP2_N^2.  */
37*f3087befSAndrew Turner     0x1.62e42ff0c52d6p-6,   /* A3 / V_POWF_EXP2_N.  */
38*f3087befSAndrew Turner   },
39*f3087befSAndrew Turner   .uflow_bound = -0x1.2cp+12f, /* -150.0 * V_POWF_EXP2_N.  */
40*f3087befSAndrew Turner   .oflow_bound = 0x1p+12f, /* 128.0 * V_POWF_EXP2_N.  */
41*f3087befSAndrew Turner   .small_bound = 0x1p-126f,
42*f3087befSAndrew Turner   .off = 0x3f35d000,
43*f3087befSAndrew Turner   .sign_bias = SignBias,
44*f3087befSAndrew Turner   .subnormal_bias = 0x0b800000, /* 23 << 23.  */
45*f3087befSAndrew Turner };
46*f3087befSAndrew Turner 
47*f3087befSAndrew Turner #define A(i) sv_f64 (d->log_poly[i])
48*f3087befSAndrew Turner #define C(i) sv_f64 (d->exp_poly[i])
49*f3087befSAndrew Turner 
50*f3087befSAndrew Turner /* Check if x is an integer.  */
51*f3087befSAndrew Turner static inline svbool_t
svisint(svbool_t pg,svfloat32_t x)52*f3087befSAndrew Turner svisint (svbool_t pg, svfloat32_t x)
53*f3087befSAndrew Turner {
54*f3087befSAndrew Turner   return svcmpeq (pg, svrintz_z (pg, x), x);
55*f3087befSAndrew Turner }
56*f3087befSAndrew Turner 
57*f3087befSAndrew Turner /* Check if x is real not integer valued.  */
58*f3087befSAndrew Turner static inline svbool_t
svisnotint(svbool_t pg,svfloat32_t x)59*f3087befSAndrew Turner svisnotint (svbool_t pg, svfloat32_t x)
60*f3087befSAndrew Turner {
61*f3087befSAndrew Turner   return svcmpne (pg, svrintz_z (pg, x), x);
62*f3087befSAndrew Turner }
63*f3087befSAndrew Turner 
64*f3087befSAndrew Turner /* Check if x is an odd integer.  */
65*f3087befSAndrew Turner static inline svbool_t
svisodd(svbool_t pg,svfloat32_t x)66*f3087befSAndrew Turner svisodd (svbool_t pg, svfloat32_t x)
67*f3087befSAndrew Turner {
68*f3087befSAndrew Turner   svfloat32_t y = svmul_x (pg, x, 0.5f);
69*f3087befSAndrew Turner   return svisnotint (pg, y);
70*f3087befSAndrew Turner }
71*f3087befSAndrew Turner 
72*f3087befSAndrew Turner /* Check if zero, inf or nan.  */
73*f3087befSAndrew Turner static inline svbool_t
sv_zeroinfnan(svbool_t pg,svuint32_t i)74*f3087befSAndrew Turner sv_zeroinfnan (svbool_t pg, svuint32_t i)
75*f3087befSAndrew Turner {
76*f3087befSAndrew Turner   return svcmpge (pg, svsub_x (pg, svadd_x (pg, i, i), 1),
77*f3087befSAndrew Turner 		  2u * 0x7f800000 - 1);
78*f3087befSAndrew Turner }
79*f3087befSAndrew Turner 
80*f3087befSAndrew Turner /* Returns 0 if not int, 1 if odd int, 2 if even int.  The argument is
81*f3087befSAndrew Turner    the bit representation of a non-zero finite floating-point value.  */
82*f3087befSAndrew Turner static inline int
checkint(uint32_t iy)83*f3087befSAndrew Turner checkint (uint32_t iy)
84*f3087befSAndrew Turner {
85*f3087befSAndrew Turner   int e = iy >> 23 & 0xff;
86*f3087befSAndrew Turner   if (e < 0x7f)
87*f3087befSAndrew Turner     return 0;
88*f3087befSAndrew Turner   if (e > 0x7f + 23)
89*f3087befSAndrew Turner     return 2;
90*f3087befSAndrew Turner   if (iy & ((1 << (0x7f + 23 - e)) - 1))
91*f3087befSAndrew Turner     return 0;
92*f3087befSAndrew Turner   if (iy & (1 << (0x7f + 23 - e)))
93*f3087befSAndrew Turner     return 1;
94*f3087befSAndrew Turner   return 2;
95*f3087befSAndrew Turner }
96*f3087befSAndrew Turner 
97*f3087befSAndrew Turner /* Check if zero, inf or nan.  */
98*f3087befSAndrew Turner static inline int
zeroinfnan(uint32_t ix)99*f3087befSAndrew Turner zeroinfnan (uint32_t ix)
100*f3087befSAndrew Turner {
101*f3087befSAndrew Turner   return 2 * ix - 1 >= 2u * 0x7f800000 - 1;
102*f3087befSAndrew Turner }
103*f3087befSAndrew Turner 
104*f3087befSAndrew Turner /* A scalar subroutine used to fix main power special cases. Similar to the
105*f3087befSAndrew Turner    preamble of scalar powf except that we do not update ix and sign_bias. This
106*f3087befSAndrew Turner    is done in the preamble of the SVE powf.  */
107*f3087befSAndrew Turner static inline float
powf_specialcase(float x,float y,float z)108*f3087befSAndrew Turner powf_specialcase (float x, float y, float z)
109*f3087befSAndrew Turner {
110*f3087befSAndrew Turner   uint32_t ix = asuint (x);
111*f3087befSAndrew Turner   uint32_t iy = asuint (y);
112*f3087befSAndrew Turner   /* Either (x < 0x1p-126 or inf or nan) or (y is 0 or inf or nan).  */
113*f3087befSAndrew Turner   if (unlikely (zeroinfnan (iy)))
114*f3087befSAndrew Turner     {
115*f3087befSAndrew Turner       if (2 * iy == 0)
116*f3087befSAndrew Turner 	return issignalingf_inline (x) ? x + y : 1.0f;
117*f3087befSAndrew Turner       if (ix == 0x3f800000)
118*f3087befSAndrew Turner 	return issignalingf_inline (y) ? x + y : 1.0f;
119*f3087befSAndrew Turner       if (2 * ix > 2u * 0x7f800000 || 2 * iy > 2u * 0x7f800000)
120*f3087befSAndrew Turner 	return x + y;
121*f3087befSAndrew Turner       if (2 * ix == 2 * 0x3f800000)
122*f3087befSAndrew Turner 	return 1.0f;
123*f3087befSAndrew Turner       if ((2 * ix < 2 * 0x3f800000) == !(iy & 0x80000000))
124*f3087befSAndrew Turner 	return 0.0f; /* |x|<1 && y==inf or |x|>1 && y==-inf.  */
125*f3087befSAndrew Turner       return y * y;
126*f3087befSAndrew Turner     }
127*f3087befSAndrew Turner   if (unlikely (zeroinfnan (ix)))
128*f3087befSAndrew Turner     {
129*f3087befSAndrew Turner       float_t x2 = x * x;
130*f3087befSAndrew Turner       if (ix & 0x80000000 && checkint (iy) == 1)
131*f3087befSAndrew Turner 	x2 = -x2;
132*f3087befSAndrew Turner       return iy & 0x80000000 ? 1 / x2 : x2;
133*f3087befSAndrew Turner     }
134*f3087befSAndrew Turner   /* We need a return here in case x<0 and y is integer, but all other tests
135*f3087befSAndrew Turner    need to be run.  */
136*f3087befSAndrew Turner   return z;
137*f3087befSAndrew Turner }
138*f3087befSAndrew Turner 
139*f3087befSAndrew Turner /* Scalar fallback for special case routines with custom signature.  */
140*f3087befSAndrew Turner static svfloat32_t NOINLINE
sv_call_powf_sc(svfloat32_t x1,svfloat32_t x2,svfloat32_t y)141*f3087befSAndrew Turner sv_call_powf_sc (svfloat32_t x1, svfloat32_t x2, svfloat32_t y)
142*f3087befSAndrew Turner {
143*f3087befSAndrew Turner   /* Special cases of x or y: zero, inf and nan.  */
144*f3087befSAndrew Turner   svbool_t xspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x1));
145*f3087befSAndrew Turner   svbool_t yspecial = sv_zeroinfnan (svptrue_b32 (), svreinterpret_u32 (x2));
146*f3087befSAndrew Turner   svbool_t cmp = svorr_z (svptrue_b32 (), xspecial, yspecial);
147*f3087befSAndrew Turner 
148*f3087befSAndrew Turner   svbool_t p = svpfirst (cmp, svpfalse ());
149*f3087befSAndrew Turner   while (svptest_any (cmp, p))
150*f3087befSAndrew Turner     {
151*f3087befSAndrew Turner       float sx1 = svclastb (p, 0, x1);
152*f3087befSAndrew Turner       float sx2 = svclastb (p, 0, x2);
153*f3087befSAndrew Turner       float elem = svclastb (p, 0, y);
154*f3087befSAndrew Turner       elem = powf_specialcase (sx1, sx2, elem);
155*f3087befSAndrew Turner       svfloat32_t y2 = sv_f32 (elem);
156*f3087befSAndrew Turner       y = svsel (p, y2, y);
157*f3087befSAndrew Turner       p = svpnext_b32 (cmp, p);
158*f3087befSAndrew Turner     }
159*f3087befSAndrew Turner   return y;
160*f3087befSAndrew Turner }
161*f3087befSAndrew Turner 
162*f3087befSAndrew Turner /* Compute core for half of the lanes in double precision.  */
163*f3087befSAndrew Turner static inline svfloat64_t
sv_powf_core_ext(const svbool_t pg,svuint64_t i,svfloat64_t z,svint64_t k,svfloat64_t y,svuint64_t sign_bias,svfloat64_t * pylogx,const struct data * d)164*f3087befSAndrew Turner sv_powf_core_ext (const svbool_t pg, svuint64_t i, svfloat64_t z, svint64_t k,
165*f3087befSAndrew Turner 		  svfloat64_t y, svuint64_t sign_bias, svfloat64_t *pylogx,
166*f3087befSAndrew Turner 		  const struct data *d)
167*f3087befSAndrew Turner {
168*f3087befSAndrew Turner   svfloat64_t invc = svld1_gather_index (pg, Tinvc, i);
169*f3087befSAndrew Turner   svfloat64_t logc = svld1_gather_index (pg, Tlogc, i);
170*f3087befSAndrew Turner 
171*f3087befSAndrew Turner   /* log2(x) = log1p(z/c-1)/ln2 + log2(c) + k.  */
172*f3087befSAndrew Turner   svfloat64_t r = svmla_x (pg, sv_f64 (-1.0), z, invc);
173*f3087befSAndrew Turner   svfloat64_t y0 = svadd_x (pg, logc, svcvt_f64_x (pg, k));
174*f3087befSAndrew Turner 
175*f3087befSAndrew Turner   /* Polynomial to approximate log1p(r)/ln2.  */
176*f3087befSAndrew Turner   svfloat64_t logx = A (0);
177*f3087befSAndrew Turner   logx = svmad_x (pg, r, logx, A (1));
178*f3087befSAndrew Turner   logx = svmad_x (pg, r, logx, A (2));
179*f3087befSAndrew Turner   logx = svmad_x (pg, r, logx, A (3));
180*f3087befSAndrew Turner   logx = svmad_x (pg, r, logx, y0);
181*f3087befSAndrew Turner   *pylogx = svmul_x (pg, y, logx);
182*f3087befSAndrew Turner 
183*f3087befSAndrew Turner   /* z - kd is in [-1, 1] in non-nearest rounding modes.  */
184*f3087befSAndrew Turner   svfloat64_t kd = svrinta_x (svptrue_b64 (), *pylogx);
185*f3087befSAndrew Turner   svuint64_t ki = svreinterpret_u64 (svcvt_s64_x (svptrue_b64 (), kd));
186*f3087befSAndrew Turner 
187*f3087befSAndrew Turner   r = svsub_x (pg, *pylogx, kd);
188*f3087befSAndrew Turner 
189*f3087befSAndrew Turner   /* exp2(x) = 2^(k/N) * 2^r ~= s * (C0*r^3 + C1*r^2 + C2*r + 1).  */
190*f3087befSAndrew Turner   svuint64_t t = svld1_gather_index (
191*f3087befSAndrew Turner       svptrue_b64 (), Texp, svand_x (svptrue_b64 (), ki, V_POWF_EXP2_N - 1));
192*f3087befSAndrew Turner   svuint64_t ski = svadd_x (svptrue_b64 (), ki, sign_bias);
193*f3087befSAndrew Turner   t = svadd_x (svptrue_b64 (), t,
194*f3087befSAndrew Turner 	       svlsl_x (svptrue_b64 (), ski, 52 - V_POWF_EXP2_TABLE_BITS));
195*f3087befSAndrew Turner   svfloat64_t s = svreinterpret_f64 (t);
196*f3087befSAndrew Turner 
197*f3087befSAndrew Turner   svfloat64_t p = C (0);
198*f3087befSAndrew Turner   p = svmla_x (pg, C (1), p, r);
199*f3087befSAndrew Turner   p = svmla_x (pg, C (2), p, r);
200*f3087befSAndrew Turner   p = svmla_x (pg, s, p, svmul_x (svptrue_b64 (), s, r));
201*f3087befSAndrew Turner 
202*f3087befSAndrew Turner   return p;
203*f3087befSAndrew Turner }
204*f3087befSAndrew Turner 
205*f3087befSAndrew Turner /* Widen vector to double precision and compute core on both halves of the
206*f3087befSAndrew Turner    vector. Lower cost of promotion by considering all lanes active.  */
207*f3087befSAndrew Turner static inline svfloat32_t
sv_powf_core(const svbool_t pg,svuint32_t i,svuint32_t iz,svint32_t k,svfloat32_t y,svuint32_t sign_bias,svfloat32_t * pylogx,const struct data * d)208*f3087befSAndrew Turner sv_powf_core (const svbool_t pg, svuint32_t i, svuint32_t iz, svint32_t k,
209*f3087befSAndrew Turner 	      svfloat32_t y, svuint32_t sign_bias, svfloat32_t *pylogx,
210*f3087befSAndrew Turner 	      const struct data *d)
211*f3087befSAndrew Turner {
212*f3087befSAndrew Turner   const svbool_t ptrue = svptrue_b64 ();
213*f3087befSAndrew Turner 
214*f3087befSAndrew Turner   /* Unpack and promote input vectors (pg, y, z, i, k and sign_bias) into two
215*f3087befSAndrew Turner      in order to perform core computation in double precision.  */
216*f3087befSAndrew Turner   const svbool_t pg_lo = svunpklo (pg);
217*f3087befSAndrew Turner   const svbool_t pg_hi = svunpkhi (pg);
218*f3087befSAndrew Turner   svfloat64_t y_lo
219*f3087befSAndrew Turner       = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (svreinterpret_u32 (y))));
220*f3087befSAndrew Turner   svfloat64_t y_hi
221*f3087befSAndrew Turner       = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (svreinterpret_u32 (y))));
222*f3087befSAndrew Turner   svfloat64_t z_lo = svcvt_f64_x (pg, svreinterpret_f32 (svunpklo (iz)));
223*f3087befSAndrew Turner   svfloat64_t z_hi = svcvt_f64_x (pg, svreinterpret_f32 (svunpkhi (iz)));
224*f3087befSAndrew Turner   svuint64_t i_lo = svunpklo (i);
225*f3087befSAndrew Turner   svuint64_t i_hi = svunpkhi (i);
226*f3087befSAndrew Turner   svint64_t k_lo = svunpklo (k);
227*f3087befSAndrew Turner   svint64_t k_hi = svunpkhi (k);
228*f3087befSAndrew Turner   svuint64_t sign_bias_lo = svunpklo (sign_bias);
229*f3087befSAndrew Turner   svuint64_t sign_bias_hi = svunpkhi (sign_bias);
230*f3087befSAndrew Turner 
231*f3087befSAndrew Turner   /* Compute each part in double precision.  */
232*f3087befSAndrew Turner   svfloat64_t ylogx_lo, ylogx_hi;
233*f3087befSAndrew Turner   svfloat64_t lo = sv_powf_core_ext (pg_lo, i_lo, z_lo, k_lo, y_lo,
234*f3087befSAndrew Turner 				     sign_bias_lo, &ylogx_lo, d);
235*f3087befSAndrew Turner   svfloat64_t hi = sv_powf_core_ext (pg_hi, i_hi, z_hi, k_hi, y_hi,
236*f3087befSAndrew Turner 				     sign_bias_hi, &ylogx_hi, d);
237*f3087befSAndrew Turner 
238*f3087befSAndrew Turner   /* Convert back to single-precision and interleave.  */
239*f3087befSAndrew Turner   svfloat32_t ylogx_lo_32 = svcvt_f32_x (ptrue, ylogx_lo);
240*f3087befSAndrew Turner   svfloat32_t ylogx_hi_32 = svcvt_f32_x (ptrue, ylogx_hi);
241*f3087befSAndrew Turner   *pylogx = svuzp1 (ylogx_lo_32, ylogx_hi_32);
242*f3087befSAndrew Turner   svfloat32_t lo_32 = svcvt_f32_x (ptrue, lo);
243*f3087befSAndrew Turner   svfloat32_t hi_32 = svcvt_f32_x (ptrue, hi);
244*f3087befSAndrew Turner   return svuzp1 (lo_32, hi_32);
245*f3087befSAndrew Turner }
246*f3087befSAndrew Turner 
247*f3087befSAndrew Turner /* Implementation of SVE powf.
248*f3087befSAndrew Turner    Provides the same accuracy as AdvSIMD powf, since it relies on the same
249*f3087befSAndrew Turner    algorithm. The theoretical maximum error is under 2.60 ULPs.
250*f3087befSAndrew Turner    Maximum measured error is 2.57 ULPs:
251*f3087befSAndrew Turner    SV_NAME_F2 (pow) (0x1.031706p+0, 0x1.ce2ec2p+12) got 0x1.fff868p+127
252*f3087befSAndrew Turner 						   want 0x1.fff862p+127.  */
SV_NAME_F2(pow)253*f3087befSAndrew Turner svfloat32_t SV_NAME_F2 (pow) (svfloat32_t x, svfloat32_t y, const svbool_t pg)
254*f3087befSAndrew Turner {
255*f3087befSAndrew Turner   const struct data *d = ptr_barrier (&data);
256*f3087befSAndrew Turner 
257*f3087befSAndrew Turner   svuint32_t vix0 = svreinterpret_u32 (x);
258*f3087befSAndrew Turner   svuint32_t viy0 = svreinterpret_u32 (y);
259*f3087befSAndrew Turner 
260*f3087befSAndrew Turner   /* Negative x cases.  */
261*f3087befSAndrew Turner   svbool_t xisneg = svcmplt (pg, x, sv_f32 (0));
262*f3087befSAndrew Turner 
263*f3087befSAndrew Turner   /* Set sign_bias and ix depending on sign of x and nature of y.  */
264*f3087befSAndrew Turner   svbool_t yint_or_xpos = pg;
265*f3087befSAndrew Turner   svuint32_t sign_bias = sv_u32 (0);
266*f3087befSAndrew Turner   svuint32_t vix = vix0;
267*f3087befSAndrew Turner   if (unlikely (svptest_any (pg, xisneg)))
268*f3087befSAndrew Turner     {
269*f3087befSAndrew Turner       /* Determine nature of y.  */
270*f3087befSAndrew Turner       yint_or_xpos = svisint (xisneg, y);
271*f3087befSAndrew Turner       svbool_t yisodd_xisneg = svisodd (xisneg, y);
272*f3087befSAndrew Turner       /* ix set to abs(ix) if y is integer.  */
273*f3087befSAndrew Turner       vix = svand_m (yint_or_xpos, vix0, 0x7fffffff);
274*f3087befSAndrew Turner       /* Set to SignBias if x is negative and y is odd.  */
275*f3087befSAndrew Turner       sign_bias = svsel (yisodd_xisneg, sv_u32 (d->sign_bias), sv_u32 (0));
276*f3087befSAndrew Turner     }
277*f3087befSAndrew Turner 
278*f3087befSAndrew Turner   /* Special cases of x or y: zero, inf and nan.  */
279*f3087befSAndrew Turner   svbool_t xspecial = sv_zeroinfnan (pg, vix0);
280*f3087befSAndrew Turner   svbool_t yspecial = sv_zeroinfnan (pg, viy0);
281*f3087befSAndrew Turner   svbool_t cmp = svorr_z (pg, xspecial, yspecial);
282*f3087befSAndrew Turner 
283*f3087befSAndrew Turner   /* Small cases of x: |x| < 0x1p-126.  */
284*f3087befSAndrew Turner   svbool_t xsmall = svaclt (yint_or_xpos, x, d->small_bound);
285*f3087befSAndrew Turner   if (unlikely (svptest_any (yint_or_xpos, xsmall)))
286*f3087befSAndrew Turner     {
287*f3087befSAndrew Turner       /* Normalize subnormal x so exponent becomes negative.  */
288*f3087befSAndrew Turner       svuint32_t vix_norm = svreinterpret_u32 (svmul_x (xsmall, x, Norm));
289*f3087befSAndrew Turner       vix_norm = svand_x (xsmall, vix_norm, 0x7fffffff);
290*f3087befSAndrew Turner       vix_norm = svsub_x (xsmall, vix_norm, d->subnormal_bias);
291*f3087befSAndrew Turner       vix = svsel (xsmall, vix_norm, vix);
292*f3087befSAndrew Turner     }
293*f3087befSAndrew Turner   /* Part of core computation carried in working precision.  */
294*f3087befSAndrew Turner   svuint32_t tmp = svsub_x (yint_or_xpos, vix, d->off);
295*f3087befSAndrew Turner   svuint32_t i = svand_x (
296*f3087befSAndrew Turner       yint_or_xpos, svlsr_x (yint_or_xpos, tmp, (23 - V_POWF_LOG2_TABLE_BITS)),
297*f3087befSAndrew Turner       V_POWF_LOG2_N - 1);
298*f3087befSAndrew Turner   svuint32_t top = svand_x (yint_or_xpos, tmp, 0xff800000);
299*f3087befSAndrew Turner   svuint32_t iz = svsub_x (yint_or_xpos, vix, top);
300*f3087befSAndrew Turner   svint32_t k = svasr_x (yint_or_xpos, svreinterpret_s32 (top),
301*f3087befSAndrew Turner 			 (23 - V_POWF_EXP2_TABLE_BITS));
302*f3087befSAndrew Turner 
303*f3087befSAndrew Turner   /* Compute core in extended precision and return intermediate ylogx results
304*f3087befSAndrew Turner      to handle cases of underflow and underflow in exp.  */
305*f3087befSAndrew Turner   svfloat32_t ylogx;
306*f3087befSAndrew Turner   svfloat32_t ret
307*f3087befSAndrew Turner       = sv_powf_core (yint_or_xpos, i, iz, k, y, sign_bias, &ylogx, d);
308*f3087befSAndrew Turner 
309*f3087befSAndrew Turner   /* Handle exp special cases of underflow and overflow.  */
310*f3087befSAndrew Turner   svuint32_t sign
311*f3087befSAndrew Turner       = svlsl_x (yint_or_xpos, sign_bias, 20 - V_POWF_EXP2_TABLE_BITS);
312*f3087befSAndrew Turner   svfloat32_t ret_oflow
313*f3087befSAndrew Turner       = svreinterpret_f32 (svorr_x (yint_or_xpos, sign, asuint (INFINITY)));
314*f3087befSAndrew Turner   svfloat32_t ret_uflow = svreinterpret_f32 (sign);
315*f3087befSAndrew Turner   ret = svsel (svcmple (yint_or_xpos, ylogx, d->uflow_bound), ret_uflow, ret);
316*f3087befSAndrew Turner   ret = svsel (svcmpgt (yint_or_xpos, ylogx, d->oflow_bound), ret_oflow, ret);
317*f3087befSAndrew Turner 
318*f3087befSAndrew Turner   /* Cases of finite y and finite negative x.  */
319*f3087befSAndrew Turner   ret = svsel (yint_or_xpos, ret, sv_f32 (__builtin_nanf ("")));
320*f3087befSAndrew Turner 
321*f3087befSAndrew Turner   if (unlikely (svptest_any (cmp, cmp)))
322*f3087befSAndrew Turner     return sv_call_powf_sc (x, y, ret);
323*f3087befSAndrew Turner 
324*f3087befSAndrew Turner   return ret;
325*f3087befSAndrew Turner }
326*f3087befSAndrew Turner 
327*f3087befSAndrew Turner TEST_SIG (SV, F, 2, pow)
328*f3087befSAndrew Turner TEST_ULP (SV_NAME_F2 (pow), 2.08)
329*f3087befSAndrew Turner TEST_DISABLE_FENV (SV_NAME_F2 (pow))
330*f3087befSAndrew Turner /* Wide intervals spanning the whole domain but shared between x and y.  */
331*f3087befSAndrew Turner #define SV_POWF_INTERVAL2(xlo, xhi, ylo, yhi, n)                              \
332*f3087befSAndrew Turner   TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, ylo, yhi, n)                    \
333*f3087befSAndrew Turner   TEST_INTERVAL2 (SV_NAME_F2 (pow), xlo, xhi, -ylo, -yhi, n)                  \
334*f3087befSAndrew Turner   TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, ylo, yhi, n)                  \
335*f3087befSAndrew Turner   TEST_INTERVAL2 (SV_NAME_F2 (pow), -xlo, -xhi, -ylo, -yhi, n)
336*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0, 0x1p-126, 0, inf, 40000)
337*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0x1p-126, 1, 0, inf, 50000)
338*f3087befSAndrew Turner SV_POWF_INTERVAL2 (1, inf, 0, inf, 50000)
339*f3087befSAndrew Turner /* x~1 or y~1.  */
340*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0x1p-1, 0x1p1, 0x1p-10, 0x1p10, 10000)
341*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0x1.ep-1, 0x1.1p0, 0x1p8, 0x1p16, 10000)
342*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0x1p-500, 0x1p500, 0x1p-1, 0x1p1, 10000)
343*f3087befSAndrew Turner /* around estimated argmaxs of ULP error.  */
344*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0x1p-300, 0x1p-200, 0x1p-20, 0x1p-10, 10000)
345*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0x1p50, 0x1p100, 0x1p-20, 0x1p-10, 10000)
346*f3087befSAndrew Turner /* x is negative, y is odd or even integer, or y is real not integer.  */
347*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 3.0, 3.0, 10000)
348*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 4.0, 4.0, 10000)
349*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), -0.0, -10.0, 0.0, 10.0, 10000)
350*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), 0.0, 10.0, -0.0, -10.0, 10000)
351*f3087befSAndrew Turner /* |x| is inf, y is odd or even integer, or y is real not integer.  */
352*f3087befSAndrew Turner SV_POWF_INTERVAL2 (inf, inf, 0.5, 0.5, 1)
353*f3087befSAndrew Turner SV_POWF_INTERVAL2 (inf, inf, 1.0, 1.0, 1)
354*f3087befSAndrew Turner SV_POWF_INTERVAL2 (inf, inf, 2.0, 2.0, 1)
355*f3087befSAndrew Turner SV_POWF_INTERVAL2 (inf, inf, 3.0, 3.0, 1)
356*f3087befSAndrew Turner /* 0.0^y.  */
357*f3087befSAndrew Turner SV_POWF_INTERVAL2 (0.0, 0.0, 0.0, 0x1p120, 1000)
358*f3087befSAndrew Turner /* 1.0^y.  */
359*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0.0, 0x1p-50, 1000)
360*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 0x1p-50, 1.0, 1000)
361*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, 1.0, 0x1p100, 1000)
362*f3087befSAndrew Turner TEST_INTERVAL2 (SV_NAME_F2 (pow), 1.0, 1.0, -1.0, -0x1p120, 1000)
363*f3087befSAndrew Turner CLOSE_SVE_ATTR
364