xref: /freebsd/contrib/arm-optimized-routines/pl/math/sv_powi.c (revision ba3c1f5972d7b90feb6e6da47905ff2757e0fe57)
1 /*
2  * Double-precision SVE powi(x, n) function.
3  *
4  * Copyright (c) 2020-2023, Arm Limited.
5  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6  */
7 
8 #include "sv_math.h"
9 #if SV_SUPPORTED
10 
11 /* Optimized double-precision vector powi (double base, long integer power).
12    powi is developed for environments in which accuracy is of much less
13    importance than performance, hence we provide no estimate for worst-case
14    error.  */
15 svfloat64_t
16 __sv_powi_x (svfloat64_t as, svint64_t ns, svbool_t p)
17 {
18   /* Compute powi by successive squaring, right to left.  */
19   svfloat64_t acc = svdup_n_f64 (1.0);
20   svbool_t want_recip = svcmplt_n_s64 (p, ns, 0);
21   svuint64_t ns_abs = svreinterpret_u64_s64 (svabs_s64_x (p, ns));
22 
23   /* We use a max to avoid needing to check whether any lane != 0 on each
24      iteration.  */
25   uint64_t max_n = svmaxv_u64 (p, ns_abs);
26 
27   svfloat64_t c = as;
28   /* Successively square c, and use merging predication (_m) to determine
29      whether or not to perform the multiplication or keep the previous
30      iteration.  */
31   while (true)
32     {
33       svbool_t px = svcmpeq_n_u64 (p, svand_n_u64_x (p, ns_abs, 1ull), 1ull);
34       acc = svmul_f64_m (px, acc, c);
35       max_n >>= 1;
36       if (max_n == 0)
37 	break;
38 
39       ns_abs = svlsr_n_u64_x (p, ns_abs, 1);
40       c = svmul_f64_x (p, c, c);
41     }
42 
43   /* Negative powers are handled by computing the abs(n) version and then
44      taking the reciprocal.  */
45   if (svptest_any (want_recip, want_recip))
46     acc = svdivr_n_f64_m (want_recip, acc, 1.0);
47 
48   return acc;
49 }
50 
51 strong_alias (__sv_powi_x, _ZGVsMxvv_powk)
52 
53 #endif // SV_SUPPORTED
54