xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/experimental/sve/powi.c (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Double-precision SVE powi(x, n) function.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2020-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #include "sv_math.h"
9*f3087befSAndrew Turner 
10*f3087befSAndrew Turner /* Optimized double-precision vector powi (double base, long integer power).
11*f3087befSAndrew Turner    powi is developed for environments in which accuracy is of much less
12*f3087befSAndrew Turner    importance than performance, hence we provide no estimate for worst-case
13*f3087befSAndrew Turner    error.  */
14*f3087befSAndrew Turner svfloat64_t
_ZGVsMxvv_powk(svfloat64_t as,svint64_t ns,svbool_t p)15*f3087befSAndrew Turner _ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p)
16*f3087befSAndrew Turner {
17*f3087befSAndrew Turner   /* Compute powi by successive squaring, right to left.  */
18*f3087befSAndrew Turner   svfloat64_t acc = sv_f64 (1.0);
19*f3087befSAndrew Turner   svbool_t want_recip = svcmplt (p, ns, 0);
20*f3087befSAndrew Turner   svuint64_t ns_abs = svreinterpret_u64 (svabs_x (p, ns));
21*f3087befSAndrew Turner 
22*f3087befSAndrew Turner   /* We use a max to avoid needing to check whether any lane != 0 on each
23*f3087befSAndrew Turner      iteration.  */
24*f3087befSAndrew Turner   uint64_t max_n = svmaxv (p, ns_abs);
25*f3087befSAndrew Turner 
26*f3087befSAndrew Turner   svfloat64_t c = as;
27*f3087befSAndrew Turner   /* Successively square c, and use merging predication (_m) to determine
28*f3087befSAndrew Turner      whether or not to perform the multiplication or keep the previous
29*f3087befSAndrew Turner      iteration.  */
30*f3087befSAndrew Turner   while (true)
31*f3087befSAndrew Turner     {
32*f3087befSAndrew Turner       svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1ull), 1ull);
33*f3087befSAndrew Turner       acc = svmul_m (px, acc, c);
34*f3087befSAndrew Turner       max_n >>= 1;
35*f3087befSAndrew Turner       if (max_n == 0)
36*f3087befSAndrew Turner 	break;
37*f3087befSAndrew Turner 
38*f3087befSAndrew Turner       ns_abs = svlsr_x (p, ns_abs, 1);
39*f3087befSAndrew Turner       c = svmul_x (p, c, c);
40*f3087befSAndrew Turner     }
41*f3087befSAndrew Turner 
42*f3087befSAndrew Turner   /* Negative powers are handled by computing the abs(n) version and then
43*f3087befSAndrew Turner      taking the reciprocal.  */
44*f3087befSAndrew Turner   if (svptest_any (want_recip, want_recip))
45*f3087befSAndrew Turner     acc = svdivr_m (want_recip, acc, 1.0);
46*f3087befSAndrew Turner 
47*f3087befSAndrew Turner   return acc;
48*f3087befSAndrew Turner }
49*f3087befSAndrew Turner CLOSE_SVE_ATTR
50