xref: /freebsd/contrib/arm-optimized-routines/math/aarch64/sve/sv_math.h (revision f3087bef11543b42e0d69b708f367097a4118d24)
1*f3087befSAndrew Turner /*
2*f3087befSAndrew Turner  * Wrapper functions for SVE ACLE.
3*f3087befSAndrew Turner  *
4*f3087befSAndrew Turner  * Copyright (c) 2019-2024, Arm Limited.
5*f3087befSAndrew Turner  * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception
6*f3087befSAndrew Turner  */
7*f3087befSAndrew Turner 
8*f3087befSAndrew Turner #ifndef SV_MATH_H
9*f3087befSAndrew Turner #define SV_MATH_H
10*f3087befSAndrew Turner 
11*f3087befSAndrew Turner /* Enable SVE in this translation unit. Note, because this is 'pushed' in
12*f3087befSAndrew Turner    clang, any file including sv_math.h will have to pop it back off again by
13*f3087befSAndrew Turner    ending the source file with CLOSE_SVE_ATTR. It is important that sv_math.h
14*f3087befSAndrew Turner    is included first so that all functions have the target attribute.  */
15*f3087befSAndrew Turner #ifdef __clang__
16*f3087befSAndrew Turner # pragma clang attribute push(__attribute__((target("sve"))),                \
17*f3087befSAndrew Turner 			       apply_to = any(function))
18*f3087befSAndrew Turner # define CLOSE_SVE_ATTR _Pragma("clang attribute pop")
19*f3087befSAndrew Turner #else
20*f3087befSAndrew Turner # pragma GCC target("+sve")
21*f3087befSAndrew Turner # define CLOSE_SVE_ATTR
22*f3087befSAndrew Turner #endif
23*f3087befSAndrew Turner 
24*f3087befSAndrew Turner #include <arm_sve.h>
25*f3087befSAndrew Turner #include <stdbool.h>
26*f3087befSAndrew Turner 
27*f3087befSAndrew Turner #include "math_config.h"
28*f3087befSAndrew Turner 
29*f3087befSAndrew Turner #define SV_NAME_F1(fun) _ZGVsMxv_##fun##f
30*f3087befSAndrew Turner #define SV_NAME_D1(fun) _ZGVsMxv_##fun
31*f3087befSAndrew Turner #define SV_NAME_F2(fun) _ZGVsMxvv_##fun##f
32*f3087befSAndrew Turner #define SV_NAME_D2(fun) _ZGVsMxvv_##fun
33*f3087befSAndrew Turner #define SV_NAME_F1_L1(fun) _ZGVsMxvl4_##fun##f
34*f3087befSAndrew Turner #define SV_NAME_D1_L1(fun) _ZGVsMxvl8_##fun
35*f3087befSAndrew Turner #define SV_NAME_F1_L2(fun) _ZGVsMxvl4l4_##fun##f
36*f3087befSAndrew Turner 
37*f3087befSAndrew Turner /* Double precision.  */
38*f3087befSAndrew Turner static inline svint64_t
sv_s64(int64_t x)39*f3087befSAndrew Turner sv_s64 (int64_t x)
40*f3087befSAndrew Turner {
41*f3087befSAndrew Turner   return svdup_s64 (x);
42*f3087befSAndrew Turner }
43*f3087befSAndrew Turner 
44*f3087befSAndrew Turner static inline svuint64_t
sv_u64(uint64_t x)45*f3087befSAndrew Turner sv_u64 (uint64_t x)
46*f3087befSAndrew Turner {
47*f3087befSAndrew Turner   return svdup_u64 (x);
48*f3087befSAndrew Turner }
49*f3087befSAndrew Turner 
50*f3087befSAndrew Turner static inline svfloat64_t
sv_f64(double x)51*f3087befSAndrew Turner sv_f64 (double x)
52*f3087befSAndrew Turner {
53*f3087befSAndrew Turner   return svdup_f64 (x);
54*f3087befSAndrew Turner }
55*f3087befSAndrew Turner 
56*f3087befSAndrew Turner static inline svfloat64_t
sv_call_f64(double (* f)(double),svfloat64_t x,svfloat64_t y,svbool_t cmp)57*f3087befSAndrew Turner sv_call_f64 (double (*f) (double), svfloat64_t x, svfloat64_t y, svbool_t cmp)
58*f3087befSAndrew Turner {
59*f3087befSAndrew Turner   svbool_t p = svpfirst (cmp, svpfalse ());
60*f3087befSAndrew Turner   while (svptest_any (cmp, p))
61*f3087befSAndrew Turner     {
62*f3087befSAndrew Turner       double elem = svclastb (p, 0, x);
63*f3087befSAndrew Turner       elem = (*f) (elem);
64*f3087befSAndrew Turner       svfloat64_t y2 = sv_f64 (elem);
65*f3087befSAndrew Turner       y = svsel (p, y2, y);
66*f3087befSAndrew Turner       p = svpnext_b64 (cmp, p);
67*f3087befSAndrew Turner     }
68*f3087befSAndrew Turner   return y;
69*f3087befSAndrew Turner }
70*f3087befSAndrew Turner 
71*f3087befSAndrew Turner static inline svfloat64_t
sv_call2_f64(double (* f)(double,double),svfloat64_t x1,svfloat64_t x2,svfloat64_t y,svbool_t cmp)72*f3087befSAndrew Turner sv_call2_f64 (double (*f) (double, double), svfloat64_t x1, svfloat64_t x2,
73*f3087befSAndrew Turner 	      svfloat64_t y, svbool_t cmp)
74*f3087befSAndrew Turner {
75*f3087befSAndrew Turner   svbool_t p = svpfirst (cmp, svpfalse ());
76*f3087befSAndrew Turner   while (svptest_any (cmp, p))
77*f3087befSAndrew Turner     {
78*f3087befSAndrew Turner       double elem1 = svclastb (p, 0, x1);
79*f3087befSAndrew Turner       double elem2 = svclastb (p, 0, x2);
80*f3087befSAndrew Turner       double ret = (*f) (elem1, elem2);
81*f3087befSAndrew Turner       svfloat64_t y2 = sv_f64 (ret);
82*f3087befSAndrew Turner       y = svsel (p, y2, y);
83*f3087befSAndrew Turner       p = svpnext_b64 (cmp, p);
84*f3087befSAndrew Turner     }
85*f3087befSAndrew Turner   return y;
86*f3087befSAndrew Turner }
87*f3087befSAndrew Turner 
88*f3087befSAndrew Turner static inline svuint64_t
sv_mod_n_u64_x(svbool_t pg,svuint64_t x,uint64_t y)89*f3087befSAndrew Turner sv_mod_n_u64_x (svbool_t pg, svuint64_t x, uint64_t y)
90*f3087befSAndrew Turner {
91*f3087befSAndrew Turner   svuint64_t q = svdiv_x (pg, x, y);
92*f3087befSAndrew Turner   return svmls_x (pg, x, q, y);
93*f3087befSAndrew Turner }
94*f3087befSAndrew Turner 
95*f3087befSAndrew Turner /* Single precision.  */
96*f3087befSAndrew Turner static inline svint32_t
sv_s32(int32_t x)97*f3087befSAndrew Turner sv_s32 (int32_t x)
98*f3087befSAndrew Turner {
99*f3087befSAndrew Turner   return svdup_s32 (x);
100*f3087befSAndrew Turner }
101*f3087befSAndrew Turner 
102*f3087befSAndrew Turner static inline svuint32_t
sv_u32(uint32_t x)103*f3087befSAndrew Turner sv_u32 (uint32_t x)
104*f3087befSAndrew Turner {
105*f3087befSAndrew Turner   return svdup_u32 (x);
106*f3087befSAndrew Turner }
107*f3087befSAndrew Turner 
108*f3087befSAndrew Turner static inline svfloat32_t
sv_f32(float x)109*f3087befSAndrew Turner sv_f32 (float x)
110*f3087befSAndrew Turner {
111*f3087befSAndrew Turner   return svdup_f32 (x);
112*f3087befSAndrew Turner }
113*f3087befSAndrew Turner 
114*f3087befSAndrew Turner static inline svfloat32_t
sv_call_f32(float (* f)(float),svfloat32_t x,svfloat32_t y,svbool_t cmp)115*f3087befSAndrew Turner sv_call_f32 (float (*f) (float), svfloat32_t x, svfloat32_t y, svbool_t cmp)
116*f3087befSAndrew Turner {
117*f3087befSAndrew Turner   svbool_t p = svpfirst (cmp, svpfalse ());
118*f3087befSAndrew Turner   while (svptest_any (cmp, p))
119*f3087befSAndrew Turner     {
120*f3087befSAndrew Turner       float elem = svclastb (p, 0, x);
121*f3087befSAndrew Turner       elem = (*f) (elem);
122*f3087befSAndrew Turner       svfloat32_t y2 = sv_f32 (elem);
123*f3087befSAndrew Turner       y = svsel (p, y2, y);
124*f3087befSAndrew Turner       p = svpnext_b32 (cmp, p);
125*f3087befSAndrew Turner     }
126*f3087befSAndrew Turner   return y;
127*f3087befSAndrew Turner }
128*f3087befSAndrew Turner 
129*f3087befSAndrew Turner static inline svfloat32_t
sv_call2_f32(float (* f)(float,float),svfloat32_t x1,svfloat32_t x2,svfloat32_t y,svbool_t cmp)130*f3087befSAndrew Turner sv_call2_f32 (float (*f) (float, float), svfloat32_t x1, svfloat32_t x2,
131*f3087befSAndrew Turner 	      svfloat32_t y, svbool_t cmp)
132*f3087befSAndrew Turner {
133*f3087befSAndrew Turner   svbool_t p = svpfirst (cmp, svpfalse ());
134*f3087befSAndrew Turner   while (svptest_any (cmp, p))
135*f3087befSAndrew Turner     {
136*f3087befSAndrew Turner       float elem1 = svclastb (p, 0, x1);
137*f3087befSAndrew Turner       float elem2 = svclastb (p, 0, x2);
138*f3087befSAndrew Turner       float ret = (*f) (elem1, elem2);
139*f3087befSAndrew Turner       svfloat32_t y2 = sv_f32 (ret);
140*f3087befSAndrew Turner       y = svsel (p, y2, y);
141*f3087befSAndrew Turner       p = svpnext_b32 (cmp, p);
142*f3087befSAndrew Turner     }
143*f3087befSAndrew Turner   return y;
144*f3087befSAndrew Turner }
145*f3087befSAndrew Turner #endif
146