1 // clang-format off
2 /*
3 * Function wrappers for ulp.
4 *
5 * Copyright (c) 2022-2023, Arm Limited.
6 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
7 */
8
9 #define _GNU_SOURCE
10 #include <stdbool.h>
11 #include <arm_neon.h>
12
13 #if USE_MPFR
sincos_mpfr_sin(mpfr_t y,const mpfr_t x,mpfr_rnd_t r)14 static int sincos_mpfr_sin(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
15 mpfr_cos(y, x, r);
16 return mpfr_sin(y, x, r);
17 }
sincos_mpfr_cos(mpfr_t y,const mpfr_t x,mpfr_rnd_t r)18 static int sincos_mpfr_cos(mpfr_t y, const mpfr_t x, mpfr_rnd_t r) {
19 mpfr_sin(y, x, r);
20 return mpfr_cos(y, x, r);
21 }
wrap_mpfr_powi(mpfr_t ret,const mpfr_t x,const mpfr_t y,mpfr_rnd_t rnd)22 static int wrap_mpfr_powi(mpfr_t ret, const mpfr_t x, const mpfr_t y, mpfr_rnd_t rnd) {
23 mpfr_t y2;
24 mpfr_init(y2);
25 mpfr_trunc(y2, y);
26 return mpfr_pow(ret, x, y2, rnd);
27 }
28 #endif
29
30 /* Our implementations of powi/powk are too imprecise to verify
31 against any established pow implementation. Instead we have the
32 following simple implementation, against which it is enough to
33 maintain bitwise reproducibility. Note the test framework expects
34 the reference impl to be of higher precision than the function
35 under test. For instance this means that the reference for
36 double-precision powi will be passed a long double, so to check
37 bitwise reproducibility we have to cast it back down to
38 double. This is fine since a round-trip to higher precision and
39 back down is correctly rounded. */
40 #define DECL_POW_INT_REF(NAME, DBL_T, FLT_T, INT_T) \
41 static DBL_T __attribute__((unused)) NAME (DBL_T in_val, DBL_T y) \
42 { \
43 INT_T n = (INT_T) round (y); \
44 FLT_T acc = 1.0; \
45 bool want_recip = n < 0; \
46 n = n < 0 ? -n : n; \
47 \
48 for (FLT_T c = in_val; n; c *= c, n >>= 1) \
49 { \
50 if (n & 0x1) \
51 { \
52 acc *= c; \
53 } \
54 } \
55 if (want_recip) \
56 { \
57 acc = 1.0 / acc; \
58 } \
59 return acc; \
60 }
61
DECL_POW_INT_REF(ref_powif,double,float,int)62 DECL_POW_INT_REF(ref_powif, double, float, int)
63 DECL_POW_INT_REF(ref_powi, long double, double, int)
64
65 #define ZVF1_WRAP(func) static float Z_##func##f(float x) { return _ZGVnN4v_##func##f(argf(x))[0]; }
66 #define ZVF2_WRAP(func) static float Z_##func##f(float x, float y) { return _ZGVnN4vv_##func##f(argf(x), argf(y))[0]; }
67 #define ZVD1_WRAP(func) static double Z_##func(double x) { return _ZGVnN2v_##func(argd(x))[0]; }
68 #define ZVD2_WRAP(func) static double Z_##func(double x, double y) { return _ZGVnN2vv_##func(argd(x), argd(y))[0]; }
69
70 #if defined(__vpcs) && __aarch64__
71
72 #define ZVNF1_WRAP(func) ZVF1_WRAP(func)
73 #define ZVNF2_WRAP(func) ZVF2_WRAP(func)
74 #define ZVND1_WRAP(func) ZVD1_WRAP(func)
75 #define ZVND2_WRAP(func) ZVD2_WRAP(func)
76
77 #else
78
79 #define ZVNF1_WRAP(func)
80 #define ZVNF2_WRAP(func)
81 #define ZVND1_WRAP(func)
82 #define ZVND2_WRAP(func)
83
84 #endif
85
86 #define ZSVF1_WRAP(func) static float Z_sv_##func##f(float x) { return svretf(_ZGVsMxv_##func##f(svargf(x), svptrue_b32())); }
87 #define ZSVF2_WRAP(func) static float Z_sv_##func##f(float x, float y) { return svretf(_ZGVsMxvv_##func##f(svargf(x), svargf(y), svptrue_b32())); }
88 #define ZSVD1_WRAP(func) static double Z_sv_##func(double x) { return svretd(_ZGVsMxv_##func(svargd(x), svptrue_b64())); }
89 #define ZSVD2_WRAP(func) static double Z_sv_##func(double x, double y) { return svretd(_ZGVsMxvv_##func(svargd(x), svargd(y), svptrue_b64())); }
90
91 #if WANT_SVE_MATH
92
93 #define ZSVNF1_WRAP(func) ZSVF1_WRAP(func)
94 #define ZSVNF2_WRAP(func) ZSVF2_WRAP(func)
95 #define ZSVND1_WRAP(func) ZSVD1_WRAP(func)
96 #define ZSVND2_WRAP(func) ZSVD2_WRAP(func)
97
98 #else
99
100 #define ZSVNF1_WRAP(func)
101 #define ZSVNF2_WRAP(func)
102 #define ZSVND1_WRAP(func)
103 #define ZSVND2_WRAP(func)
104
105 #endif
106
107 /* No wrappers for scalar routines, but PL_SIG will emit them. */
108 #define ZSNF1_WRAP(func)
109 #define ZSNF2_WRAP(func)
110 #define ZSND1_WRAP(func)
111 #define ZSND2_WRAP(func)
112
113 #include "ulp_wrappers_gen.h"
114
115 float v_sincosf_sin(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return s[0]; }
v_sincosf_cos(float x)116 float v_sincosf_cos(float x) { float32x4_t s, c; _ZGVnN4vl4l4_sincosf(vdupq_n_f32(x), &s, &c); return c[0]; }
v_cexpif_sin(float x)117 float v_cexpif_sin(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[0][0]; }
v_cexpif_cos(float x)118 float v_cexpif_cos(float x) { return _ZGVnN4v_cexpif(vdupq_n_f32(x)).val[1][0]; }
119
v_sincos_sin(double x)120 double v_sincos_sin(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return s[0]; }
v_sincos_cos(double x)121 double v_sincos_cos(double x) { float64x2_t s, c; _ZGVnN2vl8l8_sincos(vdupq_n_f64(x), &s, &c); return c[0]; }
v_cexpi_sin(double x)122 double v_cexpi_sin(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[0][0]; }
v_cexpi_cos(double x)123 double v_cexpi_cos(double x) { return _ZGVnN2v_cexpi(vdupq_n_f64(x)).val[1][0]; }
124
125 #if WANT_SVE_MATH
Z_sv_powi(float x,float y)126 static float Z_sv_powi(float x, float y) { return svretf(_ZGVsMxvv_powi(svargf(x), svdup_s32((int)round(y)), svptrue_b32())); }
Z_sv_powk(double x,double y)127 static double Z_sv_powk(double x, double y) { return svretd(_ZGVsMxvv_powk(svargd(x), svdup_s64((long)round(y)), svptrue_b64())); }
128
sv_sincosf_sin(float x)129 float sv_sincosf_sin(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return s[0]; }
sv_sincosf_cos(float x)130 float sv_sincosf_cos(float x) { float s[svcntw()], c[svcntw()]; _ZGVsMxvl4l4_sincosf(svdup_f32(x), s, c, svptrue_b32()); return c[0]; }
sv_cexpif_sin(float x)131 float sv_cexpif_sin(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 0)); }
sv_cexpif_cos(float x)132 float sv_cexpif_cos(float x) { return svretf(svget2(_ZGVsMxv_cexpif(svdup_f32(x), svptrue_b32()), 1)); }
133
sv_sincos_sin(double x)134 double sv_sincos_sin(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return s[0]; }
sv_sincos_cos(double x)135 double sv_sincos_cos(double x) { double s[svcntd()], c[svcntd()]; _ZGVsMxvl8l8_sincos(svdup_f64(x), s, c, svptrue_b64()); return c[0]; }
sv_cexpi_sin(double x)136 double sv_cexpi_sin(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 0)); }
sv_cexpi_cos(double x)137 double sv_cexpi_cos(double x) { return svretd(svget2(_ZGVsMxv_cexpi(svdup_f64(x), svptrue_b64()), 1)); }
138
139 #endif
140 // clang-format on
141