1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner * Double-precision SVE powi(x, n) function.
3*f3087befSAndrew Turner *
4*f3087befSAndrew Turner * Copyright (c) 2020-2024, Arm Limited.
5*f3087befSAndrew Turner * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner */
7*f3087befSAndrew Turner
8*f3087befSAndrew Turner #include "sv_math.h"
9*f3087befSAndrew Turner
10*f3087befSAndrew Turner /* Optimized double-precision vector powi (double base, long integer power).
11*f3087befSAndrew Turner powi is developed for environments in which accuracy is of much less
12*f3087befSAndrew Turner importance than performance, hence we provide no estimate for worst-case
13*f3087befSAndrew Turner error. */
14*f3087befSAndrew Turner svfloat64_t
_ZGVsMxvv_powk(svfloat64_t as,svint64_t ns,svbool_t p)15*f3087befSAndrew Turner _ZGVsMxvv_powk (svfloat64_t as, svint64_t ns, svbool_t p)
16*f3087befSAndrew Turner {
17*f3087befSAndrew Turner /* Compute powi by successive squaring, right to left. */
18*f3087befSAndrew Turner svfloat64_t acc = sv_f64 (1.0);
19*f3087befSAndrew Turner svbool_t want_recip = svcmplt (p, ns, 0);
20*f3087befSAndrew Turner svuint64_t ns_abs = svreinterpret_u64 (svabs_x (p, ns));
21*f3087befSAndrew Turner
22*f3087befSAndrew Turner /* We use a max to avoid needing to check whether any lane != 0 on each
23*f3087befSAndrew Turner iteration. */
24*f3087befSAndrew Turner uint64_t max_n = svmaxv (p, ns_abs);
25*f3087befSAndrew Turner
26*f3087befSAndrew Turner svfloat64_t c = as;
27*f3087befSAndrew Turner /* Successively square c, and use merging predication (_m) to determine
28*f3087befSAndrew Turner whether or not to perform the multiplication or keep the previous
29*f3087befSAndrew Turner iteration. */
30*f3087befSAndrew Turner while (true)
31*f3087befSAndrew Turner {
32*f3087befSAndrew Turner svbool_t px = svcmpeq (p, svand_x (p, ns_abs, 1ull), 1ull);
33*f3087befSAndrew Turner acc = svmul_m (px, acc, c);
34*f3087befSAndrew Turner max_n >>= 1;
35*f3087befSAndrew Turner if (max_n == 0)
36*f3087befSAndrew Turner break;
37*f3087befSAndrew Turner
38*f3087befSAndrew Turner ns_abs = svlsr_x (p, ns_abs, 1);
39*f3087befSAndrew Turner c = svmul_x (p, c, c);
40*f3087befSAndrew Turner }
41*f3087befSAndrew Turner
42*f3087befSAndrew Turner /* Negative powers are handled by computing the abs(n) version and then
43*f3087befSAndrew Turner taking the reciprocal. */
44*f3087befSAndrew Turner if (svptest_any (want_recip, want_recip))
45*f3087befSAndrew Turner acc = svdivr_m (want_recip, acc, 1.0);
46*f3087befSAndrew Turner
47*f3087befSAndrew Turner return acc;
48*f3087befSAndrew Turner }
49*f3087befSAndrew Turner CLOSE_SVE_ATTR
50