xref: /freebsd/contrib/llvm-project/clang/lib/Headers/__clang_hip_math.h (revision 5f757f3ff9144b609b3c433dfd370cc6bdc191ad)
1e8d8bef9SDimitry Andric /*===---- __clang_hip_math.h - Device-side HIP math support ----------------===
25ffd83dbSDimitry Andric  *
35ffd83dbSDimitry Andric  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
45ffd83dbSDimitry Andric  * See https://llvm.org/LICENSE.txt for license information.
55ffd83dbSDimitry Andric  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
65ffd83dbSDimitry Andric  *
75ffd83dbSDimitry Andric  *===-----------------------------------------------------------------------===
85ffd83dbSDimitry Andric  */
95ffd83dbSDimitry Andric #ifndef __CLANG_HIP_MATH_H__
105ffd83dbSDimitry Andric #define __CLANG_HIP_MATH_H__
115ffd83dbSDimitry Andric 
1269ade1e0SDimitry Andric #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__)
13e8d8bef9SDimitry Andric #error "This file is for HIP and OpenMP AMDGCN device compilation only."
14e8d8bef9SDimitry Andric #endif
15e8d8bef9SDimitry Andric 
16fe6060f1SDimitry Andric #if !defined(__HIPCC_RTC__)
175ffd83dbSDimitry Andric #include <limits.h>
185ffd83dbSDimitry Andric #include <stdint.h>
1969ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
2069ade1e0SDimitry Andric #include <omp.h>
2169ade1e0SDimitry Andric #endif
2269ade1e0SDimitry Andric #endif // !defined(__HIPCC_RTC__)
235ffd83dbSDimitry Andric 
245ffd83dbSDimitry Andric #pragma push_macro("__DEVICE__")
2569ade1e0SDimitry Andric 
2669ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
2769ade1e0SDimitry Andric #define __DEVICE__ static inline __attribute__((always_inline, nothrow))
2869ade1e0SDimitry Andric #else
29e8d8bef9SDimitry Andric #define __DEVICE__ static __device__ inline __attribute__((always_inline))
3069ade1e0SDimitry Andric #endif
315ffd83dbSDimitry Andric 
32*5f757f3fSDimitry Andric // Device library provides fast low precision and slow full-recision
33*5f757f3fSDimitry Andric // implementations for some functions. Which one gets selected depends on
34*5f757f3fSDimitry Andric // __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if
35*5f757f3fSDimitry Andric // -ffast-math or -fgpu-approx-transcendentals are in effect.
36*5f757f3fSDimitry Andric #pragma push_macro("__FAST_OR_SLOW")
37*5f757f3fSDimitry Andric #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__)
38*5f757f3fSDimitry Andric #define __FAST_OR_SLOW(fast, slow) fast
39*5f757f3fSDimitry Andric #else
40*5f757f3fSDimitry Andric #define __FAST_OR_SLOW(fast, slow) slow
41*5f757f3fSDimitry Andric #endif
42*5f757f3fSDimitry Andric 
43e8d8bef9SDimitry Andric // A few functions return bool type starting only in C++11.
44e8d8bef9SDimitry Andric #pragma push_macro("__RETURN_TYPE")
4569ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
4669ade1e0SDimitry Andric #define __RETURN_TYPE int
4769ade1e0SDimitry Andric #else
48e8d8bef9SDimitry Andric #if defined(__cplusplus)
495ffd83dbSDimitry Andric #define __RETURN_TYPE bool
50e8d8bef9SDimitry Andric #else
51e8d8bef9SDimitry Andric #define __RETURN_TYPE int
52e8d8bef9SDimitry Andric #endif
5369ade1e0SDimitry Andric #endif // __OPENMP_AMDGCN__
54e8d8bef9SDimitry Andric 
55e8d8bef9SDimitry Andric #if defined (__cplusplus) && __cplusplus < 201103L
56e8d8bef9SDimitry Andric // emulate static_assert on type sizes
57e8d8bef9SDimitry Andric template<bool>
58e8d8bef9SDimitry Andric struct __compare_result{};
59e8d8bef9SDimitry Andric template<>
60e8d8bef9SDimitry Andric struct __compare_result<true> {
61fe6060f1SDimitry Andric   static const __device__ bool valid;
62e8d8bef9SDimitry Andric };
635ffd83dbSDimitry Andric 
645ffd83dbSDimitry Andric __DEVICE__
65e8d8bef9SDimitry Andric void __suppress_unused_warning(bool b){};
66e8d8bef9SDimitry Andric template <unsigned int S, unsigned int T>
67e8d8bef9SDimitry Andric __DEVICE__ void __static_assert_equal_size() {
68e8d8bef9SDimitry Andric   __suppress_unused_warning(__compare_result<S == T>::valid);
69e8d8bef9SDimitry Andric }
70e8d8bef9SDimitry Andric 
71e8d8bef9SDimitry Andric #define __static_assert_type_size_equal(A, B) \
72e8d8bef9SDimitry Andric   __static_assert_equal_size<A,B>()
73e8d8bef9SDimitry Andric 
74e8d8bef9SDimitry Andric #else
75e8d8bef9SDimitry Andric #define __static_assert_type_size_equal(A,B) \
76e8d8bef9SDimitry Andric   static_assert((A) == (B), "")
77e8d8bef9SDimitry Andric 
78e8d8bef9SDimitry Andric #endif
79e8d8bef9SDimitry Andric 
80e8d8bef9SDimitry Andric __DEVICE__
81bdd1243dSDimitry Andric uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) {
825ffd83dbSDimitry Andric   uint64_t __r = 0;
83bdd1243dSDimitry Andric   while (*__tagp != '\0') {
845ffd83dbSDimitry Andric     char __tmp = *__tagp;
855ffd83dbSDimitry Andric 
865ffd83dbSDimitry Andric     if (__tmp >= '0' && __tmp <= '7')
875ffd83dbSDimitry Andric       __r = (__r * 8u) + __tmp - '0';
885ffd83dbSDimitry Andric     else
895ffd83dbSDimitry Andric       return 0;
905ffd83dbSDimitry Andric 
915ffd83dbSDimitry Andric     ++__tagp;
925ffd83dbSDimitry Andric   }
935ffd83dbSDimitry Andric 
945ffd83dbSDimitry Andric   return __r;
955ffd83dbSDimitry Andric }
965ffd83dbSDimitry Andric 
975ffd83dbSDimitry Andric __DEVICE__
98bdd1243dSDimitry Andric uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) {
995ffd83dbSDimitry Andric   uint64_t __r = 0;
100bdd1243dSDimitry Andric   while (*__tagp != '\0') {
1015ffd83dbSDimitry Andric     char __tmp = *__tagp;
1025ffd83dbSDimitry Andric 
1035ffd83dbSDimitry Andric     if (__tmp >= '0' && __tmp <= '9')
1045ffd83dbSDimitry Andric       __r = (__r * 10u) + __tmp - '0';
1055ffd83dbSDimitry Andric     else
1065ffd83dbSDimitry Andric       return 0;
1075ffd83dbSDimitry Andric 
1085ffd83dbSDimitry Andric     ++__tagp;
1095ffd83dbSDimitry Andric   }
1105ffd83dbSDimitry Andric 
1115ffd83dbSDimitry Andric   return __r;
1125ffd83dbSDimitry Andric }
1135ffd83dbSDimitry Andric 
1145ffd83dbSDimitry Andric __DEVICE__
115bdd1243dSDimitry Andric uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) {
1165ffd83dbSDimitry Andric   uint64_t __r = 0;
117bdd1243dSDimitry Andric   while (*__tagp != '\0') {
1185ffd83dbSDimitry Andric     char __tmp = *__tagp;
1195ffd83dbSDimitry Andric 
1205ffd83dbSDimitry Andric     if (__tmp >= '0' && __tmp <= '9')
1215ffd83dbSDimitry Andric       __r = (__r * 16u) + __tmp - '0';
1225ffd83dbSDimitry Andric     else if (__tmp >= 'a' && __tmp <= 'f')
1235ffd83dbSDimitry Andric       __r = (__r * 16u) + __tmp - 'a' + 10;
1245ffd83dbSDimitry Andric     else if (__tmp >= 'A' && __tmp <= 'F')
1255ffd83dbSDimitry Andric       __r = (__r * 16u) + __tmp - 'A' + 10;
1265ffd83dbSDimitry Andric     else
1275ffd83dbSDimitry Andric       return 0;
1285ffd83dbSDimitry Andric 
1295ffd83dbSDimitry Andric     ++__tagp;
1305ffd83dbSDimitry Andric   }
1315ffd83dbSDimitry Andric 
1325ffd83dbSDimitry Andric   return __r;
1335ffd83dbSDimitry Andric }
1345ffd83dbSDimitry Andric 
1355ffd83dbSDimitry Andric __DEVICE__
136bdd1243dSDimitry Andric uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) {
1375ffd83dbSDimitry Andric   if (*__tagp == '0') {
1385ffd83dbSDimitry Andric     ++__tagp;
1395ffd83dbSDimitry Andric 
1405ffd83dbSDimitry Andric     if (*__tagp == 'x' || *__tagp == 'X')
1415ffd83dbSDimitry Andric       return __make_mantissa_base16(__tagp);
1425ffd83dbSDimitry Andric     else
1435ffd83dbSDimitry Andric       return __make_mantissa_base8(__tagp);
1445ffd83dbSDimitry Andric   }
1455ffd83dbSDimitry Andric 
1465ffd83dbSDimitry Andric   return __make_mantissa_base10(__tagp);
1475ffd83dbSDimitry Andric }
1485ffd83dbSDimitry Andric 
1495ffd83dbSDimitry Andric // BEGIN FLOAT
150*5f757f3fSDimitry Andric 
151*5f757f3fSDimitry Andric // BEGIN INTRINSICS
152*5f757f3fSDimitry Andric 
153*5f757f3fSDimitry Andric __DEVICE__
154*5f757f3fSDimitry Andric float __cosf(float __x) { return __ocml_native_cos_f32(__x); }
155*5f757f3fSDimitry Andric 
156*5f757f3fSDimitry Andric __DEVICE__
157*5f757f3fSDimitry Andric float __exp10f(float __x) {
158*5f757f3fSDimitry Andric   const float __log2_10 = 0x1.a934f0p+1f;
159*5f757f3fSDimitry Andric   return __builtin_amdgcn_exp2f(__log2_10 * __x);
160*5f757f3fSDimitry Andric }
161*5f757f3fSDimitry Andric 
162*5f757f3fSDimitry Andric __DEVICE__
163*5f757f3fSDimitry Andric float __expf(float __x) {
164*5f757f3fSDimitry Andric   const float __log2_e = 0x1.715476p+0;
165*5f757f3fSDimitry Andric   return __builtin_amdgcn_exp2f(__log2_e * __x);
166*5f757f3fSDimitry Andric }
167*5f757f3fSDimitry Andric 
168*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
169*5f757f3fSDimitry Andric __DEVICE__
170*5f757f3fSDimitry Andric float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); }
171*5f757f3fSDimitry Andric __DEVICE__
172*5f757f3fSDimitry Andric float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); }
173*5f757f3fSDimitry Andric __DEVICE__
174*5f757f3fSDimitry Andric float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); }
175*5f757f3fSDimitry Andric __DEVICE__
176*5f757f3fSDimitry Andric float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); }
177*5f757f3fSDimitry Andric #else
178*5f757f3fSDimitry Andric __DEVICE__
179*5f757f3fSDimitry Andric float __fadd_rn(float __x, float __y) { return __x + __y; }
180*5f757f3fSDimitry Andric #endif
181*5f757f3fSDimitry Andric 
182*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
183*5f757f3fSDimitry Andric __DEVICE__
184*5f757f3fSDimitry Andric float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); }
185*5f757f3fSDimitry Andric __DEVICE__
186*5f757f3fSDimitry Andric float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); }
187*5f757f3fSDimitry Andric __DEVICE__
188*5f757f3fSDimitry Andric float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); }
189*5f757f3fSDimitry Andric __DEVICE__
190*5f757f3fSDimitry Andric float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); }
191*5f757f3fSDimitry Andric #else
192*5f757f3fSDimitry Andric __DEVICE__
193*5f757f3fSDimitry Andric float __fdiv_rn(float __x, float __y) { return __x / __y; }
194*5f757f3fSDimitry Andric #endif
195*5f757f3fSDimitry Andric 
196*5f757f3fSDimitry Andric __DEVICE__
197*5f757f3fSDimitry Andric float __fdividef(float __x, float __y) { return __x / __y; }
198*5f757f3fSDimitry Andric 
199*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
200*5f757f3fSDimitry Andric __DEVICE__
201*5f757f3fSDimitry Andric float __fmaf_rd(float __x, float __y, float __z) {
202*5f757f3fSDimitry Andric   return __ocml_fma_rtn_f32(__x, __y, __z);
203*5f757f3fSDimitry Andric }
204*5f757f3fSDimitry Andric __DEVICE__
205*5f757f3fSDimitry Andric float __fmaf_rn(float __x, float __y, float __z) {
206*5f757f3fSDimitry Andric   return __ocml_fma_rte_f32(__x, __y, __z);
207*5f757f3fSDimitry Andric }
208*5f757f3fSDimitry Andric __DEVICE__
209*5f757f3fSDimitry Andric float __fmaf_ru(float __x, float __y, float __z) {
210*5f757f3fSDimitry Andric   return __ocml_fma_rtp_f32(__x, __y, __z);
211*5f757f3fSDimitry Andric }
212*5f757f3fSDimitry Andric __DEVICE__
213*5f757f3fSDimitry Andric float __fmaf_rz(float __x, float __y, float __z) {
214*5f757f3fSDimitry Andric   return __ocml_fma_rtz_f32(__x, __y, __z);
215*5f757f3fSDimitry Andric }
216*5f757f3fSDimitry Andric #else
217*5f757f3fSDimitry Andric __DEVICE__
218*5f757f3fSDimitry Andric float __fmaf_rn(float __x, float __y, float __z) {
219*5f757f3fSDimitry Andric   return __builtin_fmaf(__x, __y, __z);
220*5f757f3fSDimitry Andric }
221*5f757f3fSDimitry Andric #endif
222*5f757f3fSDimitry Andric 
223*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
224*5f757f3fSDimitry Andric __DEVICE__
225*5f757f3fSDimitry Andric float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); }
226*5f757f3fSDimitry Andric __DEVICE__
227*5f757f3fSDimitry Andric float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); }
228*5f757f3fSDimitry Andric __DEVICE__
229*5f757f3fSDimitry Andric float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); }
230*5f757f3fSDimitry Andric __DEVICE__
231*5f757f3fSDimitry Andric float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); }
232*5f757f3fSDimitry Andric #else
233*5f757f3fSDimitry Andric __DEVICE__
234*5f757f3fSDimitry Andric float __fmul_rn(float __x, float __y) { return __x * __y; }
235*5f757f3fSDimitry Andric #endif
236*5f757f3fSDimitry Andric 
237*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
238*5f757f3fSDimitry Andric __DEVICE__
239*5f757f3fSDimitry Andric float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); }
240*5f757f3fSDimitry Andric __DEVICE__
241*5f757f3fSDimitry Andric float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); }
242*5f757f3fSDimitry Andric __DEVICE__
243*5f757f3fSDimitry Andric float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); }
244*5f757f3fSDimitry Andric __DEVICE__
245*5f757f3fSDimitry Andric float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); }
246*5f757f3fSDimitry Andric #else
247*5f757f3fSDimitry Andric __DEVICE__
248*5f757f3fSDimitry Andric float __frcp_rn(float __x) { return 1.0f / __x; }
249*5f757f3fSDimitry Andric #endif
250*5f757f3fSDimitry Andric 
251*5f757f3fSDimitry Andric __DEVICE__
252*5f757f3fSDimitry Andric float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); }
253*5f757f3fSDimitry Andric 
254*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
255*5f757f3fSDimitry Andric __DEVICE__
256*5f757f3fSDimitry Andric float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); }
257*5f757f3fSDimitry Andric __DEVICE__
258*5f757f3fSDimitry Andric float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); }
259*5f757f3fSDimitry Andric __DEVICE__
260*5f757f3fSDimitry Andric float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); }
261*5f757f3fSDimitry Andric __DEVICE__
262*5f757f3fSDimitry Andric float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); }
263*5f757f3fSDimitry Andric #else
264*5f757f3fSDimitry Andric __DEVICE__
265*5f757f3fSDimitry Andric float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); }
266*5f757f3fSDimitry Andric #endif
267*5f757f3fSDimitry Andric 
268*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
269*5f757f3fSDimitry Andric __DEVICE__
270*5f757f3fSDimitry Andric float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); }
271*5f757f3fSDimitry Andric __DEVICE__
272*5f757f3fSDimitry Andric float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); }
273*5f757f3fSDimitry Andric __DEVICE__
274*5f757f3fSDimitry Andric float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); }
275*5f757f3fSDimitry Andric __DEVICE__
276*5f757f3fSDimitry Andric float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); }
277*5f757f3fSDimitry Andric #else
278*5f757f3fSDimitry Andric __DEVICE__
279*5f757f3fSDimitry Andric float __fsub_rn(float __x, float __y) { return __x - __y; }
280*5f757f3fSDimitry Andric #endif
281*5f757f3fSDimitry Andric 
282*5f757f3fSDimitry Andric __DEVICE__
283*5f757f3fSDimitry Andric float __log10f(float __x) { return __builtin_log10f(__x); }
284*5f757f3fSDimitry Andric 
285*5f757f3fSDimitry Andric __DEVICE__
286*5f757f3fSDimitry Andric float __log2f(float __x) { return __builtin_amdgcn_logf(__x); }
287*5f757f3fSDimitry Andric 
288*5f757f3fSDimitry Andric __DEVICE__
289*5f757f3fSDimitry Andric float __logf(float __x) { return __builtin_logf(__x); }
290*5f757f3fSDimitry Andric 
291*5f757f3fSDimitry Andric __DEVICE__
292*5f757f3fSDimitry Andric float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
293*5f757f3fSDimitry Andric 
294*5f757f3fSDimitry Andric __DEVICE__
295*5f757f3fSDimitry Andric float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); }
296*5f757f3fSDimitry Andric 
297*5f757f3fSDimitry Andric __DEVICE__
298*5f757f3fSDimitry Andric void __sincosf(float __x, float *__sinptr, float *__cosptr) {
299*5f757f3fSDimitry Andric   *__sinptr = __ocml_native_sin_f32(__x);
300*5f757f3fSDimitry Andric   *__cosptr = __ocml_native_cos_f32(__x);
301*5f757f3fSDimitry Andric }
302*5f757f3fSDimitry Andric 
303*5f757f3fSDimitry Andric __DEVICE__
304*5f757f3fSDimitry Andric float __sinf(float __x) { return __ocml_native_sin_f32(__x); }
305*5f757f3fSDimitry Andric 
306*5f757f3fSDimitry Andric __DEVICE__
307*5f757f3fSDimitry Andric float __tanf(float __x) {
308*5f757f3fSDimitry Andric   return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x));
309*5f757f3fSDimitry Andric }
310*5f757f3fSDimitry Andric // END INTRINSICS
311*5f757f3fSDimitry Andric 
312e8d8bef9SDimitry Andric #if defined(__cplusplus)
3135ffd83dbSDimitry Andric __DEVICE__
314e8d8bef9SDimitry Andric int abs(int __x) {
315*5f757f3fSDimitry Andric   return __builtin_abs(__x);
3165ffd83dbSDimitry Andric }
3175ffd83dbSDimitry Andric __DEVICE__
318e8d8bef9SDimitry Andric long labs(long __x) {
319*5f757f3fSDimitry Andric   return __builtin_labs(__x);
320e8d8bef9SDimitry Andric }
3215ffd83dbSDimitry Andric __DEVICE__
322e8d8bef9SDimitry Andric long long llabs(long long __x) {
323*5f757f3fSDimitry Andric   return __builtin_llabs(__x);
324e8d8bef9SDimitry Andric }
325e8d8bef9SDimitry Andric #endif
326e8d8bef9SDimitry Andric 
3275ffd83dbSDimitry Andric __DEVICE__
328e8d8bef9SDimitry Andric float acosf(float __x) { return __ocml_acos_f32(__x); }
329e8d8bef9SDimitry Andric 
3305ffd83dbSDimitry Andric __DEVICE__
331e8d8bef9SDimitry Andric float acoshf(float __x) { return __ocml_acosh_f32(__x); }
332e8d8bef9SDimitry Andric 
3335ffd83dbSDimitry Andric __DEVICE__
334e8d8bef9SDimitry Andric float asinf(float __x) { return __ocml_asin_f32(__x); }
335e8d8bef9SDimitry Andric 
3365ffd83dbSDimitry Andric __DEVICE__
337e8d8bef9SDimitry Andric float asinhf(float __x) { return __ocml_asinh_f32(__x); }
338e8d8bef9SDimitry Andric 
3395ffd83dbSDimitry Andric __DEVICE__
340e8d8bef9SDimitry Andric float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); }
341e8d8bef9SDimitry Andric 
3425ffd83dbSDimitry Andric __DEVICE__
343e8d8bef9SDimitry Andric float atanf(float __x) { return __ocml_atan_f32(__x); }
344e8d8bef9SDimitry Andric 
3455ffd83dbSDimitry Andric __DEVICE__
346e8d8bef9SDimitry Andric float atanhf(float __x) { return __ocml_atanh_f32(__x); }
347e8d8bef9SDimitry Andric 
3485ffd83dbSDimitry Andric __DEVICE__
349e8d8bef9SDimitry Andric float cbrtf(float __x) { return __ocml_cbrt_f32(__x); }
350e8d8bef9SDimitry Andric 
3515ffd83dbSDimitry Andric __DEVICE__
35206c3fb27SDimitry Andric float ceilf(float __x) { return __builtin_ceilf(__x); }
353e8d8bef9SDimitry Andric 
3545ffd83dbSDimitry Andric __DEVICE__
35506c3fb27SDimitry Andric float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); }
356e8d8bef9SDimitry Andric 
3575ffd83dbSDimitry Andric __DEVICE__
358*5f757f3fSDimitry Andric float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); }
359e8d8bef9SDimitry Andric 
3605ffd83dbSDimitry Andric __DEVICE__
361e8d8bef9SDimitry Andric float coshf(float __x) { return __ocml_cosh_f32(__x); }
362e8d8bef9SDimitry Andric 
3635ffd83dbSDimitry Andric __DEVICE__
364e8d8bef9SDimitry Andric float cospif(float __x) { return __ocml_cospi_f32(__x); }
365e8d8bef9SDimitry Andric 
3665ffd83dbSDimitry Andric __DEVICE__
367e8d8bef9SDimitry Andric float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); }
368e8d8bef9SDimitry Andric 
3695ffd83dbSDimitry Andric __DEVICE__
370e8d8bef9SDimitry Andric float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); }
371e8d8bef9SDimitry Andric 
3725ffd83dbSDimitry Andric __DEVICE__
373e8d8bef9SDimitry Andric float erfcf(float __x) { return __ocml_erfc_f32(__x); }
374e8d8bef9SDimitry Andric 
3755ffd83dbSDimitry Andric __DEVICE__
376e8d8bef9SDimitry Andric float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); }
377e8d8bef9SDimitry Andric 
378e8d8bef9SDimitry Andric __DEVICE__
379e8d8bef9SDimitry Andric float erfcxf(float __x) { return __ocml_erfcx_f32(__x); }
380e8d8bef9SDimitry Andric 
381e8d8bef9SDimitry Andric __DEVICE__
382e8d8bef9SDimitry Andric float erff(float __x) { return __ocml_erf_f32(__x); }
383e8d8bef9SDimitry Andric 
384e8d8bef9SDimitry Andric __DEVICE__
385e8d8bef9SDimitry Andric float erfinvf(float __x) { return __ocml_erfinv_f32(__x); }
386e8d8bef9SDimitry Andric 
387e8d8bef9SDimitry Andric __DEVICE__
388e8d8bef9SDimitry Andric float exp10f(float __x) { return __ocml_exp10_f32(__x); }
389e8d8bef9SDimitry Andric 
390e8d8bef9SDimitry Andric __DEVICE__
39106c3fb27SDimitry Andric float exp2f(float __x) { return __builtin_exp2f(__x); }
392e8d8bef9SDimitry Andric 
393e8d8bef9SDimitry Andric __DEVICE__
39406c3fb27SDimitry Andric float expf(float __x) { return __builtin_expf(__x); }
395e8d8bef9SDimitry Andric 
396e8d8bef9SDimitry Andric __DEVICE__
397e8d8bef9SDimitry Andric float expm1f(float __x) { return __ocml_expm1_f32(__x); }
398e8d8bef9SDimitry Andric 
399e8d8bef9SDimitry Andric __DEVICE__
400bdd1243dSDimitry Andric float fabsf(float __x) { return __builtin_fabsf(__x); }
401e8d8bef9SDimitry Andric 
402e8d8bef9SDimitry Andric __DEVICE__
403e8d8bef9SDimitry Andric float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); }
404e8d8bef9SDimitry Andric 
405e8d8bef9SDimitry Andric __DEVICE__
406e8d8bef9SDimitry Andric float fdividef(float __x, float __y) { return __x / __y; }
407e8d8bef9SDimitry Andric 
408e8d8bef9SDimitry Andric __DEVICE__
40906c3fb27SDimitry Andric float floorf(float __x) { return __builtin_floorf(__x); }
410e8d8bef9SDimitry Andric 
411e8d8bef9SDimitry Andric __DEVICE__
412e8d8bef9SDimitry Andric float fmaf(float __x, float __y, float __z) {
41306c3fb27SDimitry Andric   return __builtin_fmaf(__x, __y, __z);
4145ffd83dbSDimitry Andric }
415e8d8bef9SDimitry Andric 
4165ffd83dbSDimitry Andric __DEVICE__
41706c3fb27SDimitry Andric float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
418e8d8bef9SDimitry Andric 
4195ffd83dbSDimitry Andric __DEVICE__
42006c3fb27SDimitry Andric float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); }
421e8d8bef9SDimitry Andric 
4225ffd83dbSDimitry Andric __DEVICE__
423e8d8bef9SDimitry Andric float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); }
424e8d8bef9SDimitry Andric 
4255ffd83dbSDimitry Andric __DEVICE__
426e8d8bef9SDimitry Andric float frexpf(float __x, int *__nptr) {
42706c3fb27SDimitry Andric   return __builtin_frexpf(__x, __nptr);
4285ffd83dbSDimitry Andric }
429e8d8bef9SDimitry Andric 
4305ffd83dbSDimitry Andric __DEVICE__
431e8d8bef9SDimitry Andric float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); }
432e8d8bef9SDimitry Andric 
4335ffd83dbSDimitry Andric __DEVICE__
434e8d8bef9SDimitry Andric int ilogbf(float __x) { return __ocml_ilogb_f32(__x); }
435e8d8bef9SDimitry Andric 
4365ffd83dbSDimitry Andric __DEVICE__
43706c3fb27SDimitry Andric __RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); }
438e8d8bef9SDimitry Andric 
4395ffd83dbSDimitry Andric __DEVICE__
44006c3fb27SDimitry Andric __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); }
441e8d8bef9SDimitry Andric 
4425ffd83dbSDimitry Andric __DEVICE__
44306c3fb27SDimitry Andric __RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); }
444e8d8bef9SDimitry Andric 
4455ffd83dbSDimitry Andric __DEVICE__
446e8d8bef9SDimitry Andric float j0f(float __x) { return __ocml_j0_f32(__x); }
447e8d8bef9SDimitry Andric 
4485ffd83dbSDimitry Andric __DEVICE__
449e8d8bef9SDimitry Andric float j1f(float __x) { return __ocml_j1_f32(__x); }
450e8d8bef9SDimitry Andric 
4515ffd83dbSDimitry Andric __DEVICE__
452e8d8bef9SDimitry Andric float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication
4535ffd83dbSDimitry Andric                                 // and the Miller & Brown algorithm
4545ffd83dbSDimitry Andric   //       for linear recurrences to get O(log n) steps, but it's unclear if
4555ffd83dbSDimitry Andric   //       it'd be beneficial in this case.
4565ffd83dbSDimitry Andric   if (__n == 0)
4575ffd83dbSDimitry Andric     return j0f(__x);
4585ffd83dbSDimitry Andric   if (__n == 1)
4595ffd83dbSDimitry Andric     return j1f(__x);
4605ffd83dbSDimitry Andric 
4615ffd83dbSDimitry Andric   float __x0 = j0f(__x);
4625ffd83dbSDimitry Andric   float __x1 = j1f(__x);
4635ffd83dbSDimitry Andric   for (int __i = 1; __i < __n; ++__i) {
4645ffd83dbSDimitry Andric     float __x2 = (2 * __i) / __x * __x1 - __x0;
4655ffd83dbSDimitry Andric     __x0 = __x1;
4665ffd83dbSDimitry Andric     __x1 = __x2;
4675ffd83dbSDimitry Andric   }
4685ffd83dbSDimitry Andric 
4695ffd83dbSDimitry Andric   return __x1;
4705ffd83dbSDimitry Andric }
471e8d8bef9SDimitry Andric 
4725ffd83dbSDimitry Andric __DEVICE__
47306c3fb27SDimitry Andric float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); }
474e8d8bef9SDimitry Andric 
4755ffd83dbSDimitry Andric __DEVICE__
476e8d8bef9SDimitry Andric float lgammaf(float __x) { return __ocml_lgamma_f32(__x); }
477e8d8bef9SDimitry Andric 
4785ffd83dbSDimitry Andric __DEVICE__
47906c3fb27SDimitry Andric long long int llrintf(float __x) { return __builtin_rintf(__x); }
480e8d8bef9SDimitry Andric 
4815ffd83dbSDimitry Andric __DEVICE__
48206c3fb27SDimitry Andric long long int llroundf(float __x) { return __builtin_roundf(__x); }
483e8d8bef9SDimitry Andric 
4845ffd83dbSDimitry Andric __DEVICE__
48506c3fb27SDimitry Andric float log10f(float __x) { return __builtin_log10f(__x); }
486e8d8bef9SDimitry Andric 
4875ffd83dbSDimitry Andric __DEVICE__
488e8d8bef9SDimitry Andric float log1pf(float __x) { return __ocml_log1p_f32(__x); }
489e8d8bef9SDimitry Andric 
4905ffd83dbSDimitry Andric __DEVICE__
491*5f757f3fSDimitry Andric float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); }
492e8d8bef9SDimitry Andric 
4935ffd83dbSDimitry Andric __DEVICE__
494e8d8bef9SDimitry Andric float logbf(float __x) { return __ocml_logb_f32(__x); }
495e8d8bef9SDimitry Andric 
4965ffd83dbSDimitry Andric __DEVICE__
497*5f757f3fSDimitry Andric float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); }
498e8d8bef9SDimitry Andric 
4995ffd83dbSDimitry Andric __DEVICE__
50006c3fb27SDimitry Andric long int lrintf(float __x) { return __builtin_rintf(__x); }
501e8d8bef9SDimitry Andric 
5025ffd83dbSDimitry Andric __DEVICE__
50306c3fb27SDimitry Andric long int lroundf(float __x) { return __builtin_roundf(__x); }
504e8d8bef9SDimitry Andric 
5055ffd83dbSDimitry Andric __DEVICE__
506e8d8bef9SDimitry Andric float modff(float __x, float *__iptr) {
5075ffd83dbSDimitry Andric   float __tmp;
50869ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
50969ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
51069ade1e0SDimitry Andric #endif
5115ffd83dbSDimitry Andric   float __r =
5125ffd83dbSDimitry Andric       __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
5135ffd83dbSDimitry Andric   *__iptr = __tmp;
5145ffd83dbSDimitry Andric   return __r;
5155ffd83dbSDimitry Andric }
516e8d8bef9SDimitry Andric 
5175ffd83dbSDimitry Andric __DEVICE__
518bdd1243dSDimitry Andric float nanf(const char *__tagp __attribute__((nonnull))) {
5195ffd83dbSDimitry Andric   union {
5205ffd83dbSDimitry Andric     float val;
5215ffd83dbSDimitry Andric     struct ieee_float {
522e8d8bef9SDimitry Andric       unsigned int mantissa : 22;
523e8d8bef9SDimitry Andric       unsigned int quiet : 1;
524e8d8bef9SDimitry Andric       unsigned int exponent : 8;
525e8d8bef9SDimitry Andric       unsigned int sign : 1;
5265ffd83dbSDimitry Andric     } bits;
5275ffd83dbSDimitry Andric   } __tmp;
528e8d8bef9SDimitry Andric   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
5295ffd83dbSDimitry Andric 
5305ffd83dbSDimitry Andric   __tmp.bits.sign = 0u;
5315ffd83dbSDimitry Andric   __tmp.bits.exponent = ~0u;
5325ffd83dbSDimitry Andric   __tmp.bits.quiet = 1u;
5335ffd83dbSDimitry Andric   __tmp.bits.mantissa = __make_mantissa(__tagp);
5345ffd83dbSDimitry Andric 
5355ffd83dbSDimitry Andric   return __tmp.val;
5365ffd83dbSDimitry Andric }
537e8d8bef9SDimitry Andric 
5385ffd83dbSDimitry Andric __DEVICE__
53906c3fb27SDimitry Andric float nearbyintf(float __x) { return __builtin_nearbyintf(__x); }
540e8d8bef9SDimitry Andric 
5415ffd83dbSDimitry Andric __DEVICE__
542e8d8bef9SDimitry Andric float nextafterf(float __x, float __y) {
5435ffd83dbSDimitry Andric   return __ocml_nextafter_f32(__x, __y);
5445ffd83dbSDimitry Andric }
545e8d8bef9SDimitry Andric 
5465ffd83dbSDimitry Andric __DEVICE__
547e8d8bef9SDimitry Andric float norm3df(float __x, float __y, float __z) {
5485ffd83dbSDimitry Andric   return __ocml_len3_f32(__x, __y, __z);
5495ffd83dbSDimitry Andric }
550e8d8bef9SDimitry Andric 
5515ffd83dbSDimitry Andric __DEVICE__
552e8d8bef9SDimitry Andric float norm4df(float __x, float __y, float __z, float __w) {
5535ffd83dbSDimitry Andric   return __ocml_len4_f32(__x, __y, __z, __w);
5545ffd83dbSDimitry Andric }
555e8d8bef9SDimitry Andric 
5565ffd83dbSDimitry Andric __DEVICE__
557e8d8bef9SDimitry Andric float normcdff(float __x) { return __ocml_ncdf_f32(__x); }
558e8d8bef9SDimitry Andric 
5595ffd83dbSDimitry Andric __DEVICE__
560e8d8bef9SDimitry Andric float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); }
561e8d8bef9SDimitry Andric 
5625ffd83dbSDimitry Andric __DEVICE__
563e8d8bef9SDimitry Andric float normf(int __dim,
5645ffd83dbSDimitry Andric             const float *__a) { // TODO: placeholder until OCML adds support.
5655ffd83dbSDimitry Andric   float __r = 0;
5665ffd83dbSDimitry Andric   while (__dim--) {
5675ffd83dbSDimitry Andric     __r += __a[0] * __a[0];
5685ffd83dbSDimitry Andric     ++__a;
5695ffd83dbSDimitry Andric   }
5705ffd83dbSDimitry Andric 
571*5f757f3fSDimitry Andric   return __builtin_sqrtf(__r);
5725ffd83dbSDimitry Andric }
573e8d8bef9SDimitry Andric 
5745ffd83dbSDimitry Andric __DEVICE__
575e8d8bef9SDimitry Andric float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); }
576e8d8bef9SDimitry Andric 
5775ffd83dbSDimitry Andric __DEVICE__
578e8d8bef9SDimitry Andric float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); }
579e8d8bef9SDimitry Andric 
5805ffd83dbSDimitry Andric __DEVICE__
581e8d8bef9SDimitry Andric float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); }
582e8d8bef9SDimitry Andric 
583e8d8bef9SDimitry Andric __DEVICE__
584e8d8bef9SDimitry Andric float remainderf(float __x, float __y) {
5855ffd83dbSDimitry Andric   return __ocml_remainder_f32(__x, __y);
5865ffd83dbSDimitry Andric }
587e8d8bef9SDimitry Andric 
5885ffd83dbSDimitry Andric __DEVICE__
589e8d8bef9SDimitry Andric float remquof(float __x, float __y, int *__quo) {
5905ffd83dbSDimitry Andric   int __tmp;
59169ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
59269ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
59369ade1e0SDimitry Andric #endif
5945ffd83dbSDimitry Andric   float __r = __ocml_remquo_f32(
5955ffd83dbSDimitry Andric       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
5965ffd83dbSDimitry Andric   *__quo = __tmp;
5975ffd83dbSDimitry Andric 
5985ffd83dbSDimitry Andric   return __r;
5995ffd83dbSDimitry Andric }
600e8d8bef9SDimitry Andric 
6015ffd83dbSDimitry Andric __DEVICE__
602e8d8bef9SDimitry Andric float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); }
603e8d8bef9SDimitry Andric 
6045ffd83dbSDimitry Andric __DEVICE__
60506c3fb27SDimitry Andric float rintf(float __x) { return __builtin_rintf(__x); }
606e8d8bef9SDimitry Andric 
6075ffd83dbSDimitry Andric __DEVICE__
608e8d8bef9SDimitry Andric float rnorm3df(float __x, float __y, float __z) {
6095ffd83dbSDimitry Andric   return __ocml_rlen3_f32(__x, __y, __z);
6105ffd83dbSDimitry Andric }
6115ffd83dbSDimitry Andric 
6125ffd83dbSDimitry Andric __DEVICE__
613e8d8bef9SDimitry Andric float rnorm4df(float __x, float __y, float __z, float __w) {
6145ffd83dbSDimitry Andric   return __ocml_rlen4_f32(__x, __y, __z, __w);
6155ffd83dbSDimitry Andric }
616e8d8bef9SDimitry Andric 
6175ffd83dbSDimitry Andric __DEVICE__
618e8d8bef9SDimitry Andric float rnormf(int __dim,
6195ffd83dbSDimitry Andric              const float *__a) { // TODO: placeholder until OCML adds support.
6205ffd83dbSDimitry Andric   float __r = 0;
6215ffd83dbSDimitry Andric   while (__dim--) {
6225ffd83dbSDimitry Andric     __r += __a[0] * __a[0];
6235ffd83dbSDimitry Andric     ++__a;
6245ffd83dbSDimitry Andric   }
6255ffd83dbSDimitry Andric 
6265ffd83dbSDimitry Andric   return __ocml_rsqrt_f32(__r);
6275ffd83dbSDimitry Andric }
628e8d8bef9SDimitry Andric 
6295ffd83dbSDimitry Andric __DEVICE__
63006c3fb27SDimitry Andric float roundf(float __x) { return __builtin_roundf(__x); }
631e8d8bef9SDimitry Andric 
6325ffd83dbSDimitry Andric __DEVICE__
633e8d8bef9SDimitry Andric float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); }
634e8d8bef9SDimitry Andric 
6355ffd83dbSDimitry Andric __DEVICE__
636e8d8bef9SDimitry Andric float scalblnf(float __x, long int __n) {
63706c3fb27SDimitry Andric   return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n)
6385ffd83dbSDimitry Andric                          : __ocml_scalb_f32(__x, __n);
6395ffd83dbSDimitry Andric }
6405ffd83dbSDimitry Andric 
641e8d8bef9SDimitry Andric __DEVICE__
64206c3fb27SDimitry Andric float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); }
643e8d8bef9SDimitry Andric 
644e8d8bef9SDimitry Andric __DEVICE__
64506c3fb27SDimitry Andric __RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); }
646e8d8bef9SDimitry Andric 
647e8d8bef9SDimitry Andric __DEVICE__
648e8d8bef9SDimitry Andric void sincosf(float __x, float *__sinptr, float *__cosptr) {
649e8d8bef9SDimitry Andric   float __tmp;
65069ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
65169ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
65269ade1e0SDimitry Andric #endif
653*5f757f3fSDimitry Andric #ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__
654*5f757f3fSDimitry Andric   __sincosf(__x, __sinptr, __cosptr);
655*5f757f3fSDimitry Andric #else
6565ffd83dbSDimitry Andric   *__sinptr =
6575ffd83dbSDimitry Andric       __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp);
6585ffd83dbSDimitry Andric   *__cosptr = __tmp;
659*5f757f3fSDimitry Andric #endif
6605ffd83dbSDimitry Andric }
6615ffd83dbSDimitry Andric 
662e8d8bef9SDimitry Andric __DEVICE__
663e8d8bef9SDimitry Andric void sincospif(float __x, float *__sinptr, float *__cosptr) {
664e8d8bef9SDimitry Andric   float __tmp;
66569ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
66669ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
66769ade1e0SDimitry Andric #endif
6685ffd83dbSDimitry Andric   *__sinptr = __ocml_sincospi_f32(
6695ffd83dbSDimitry Andric       __x, (__attribute__((address_space(5))) float *)&__tmp);
6705ffd83dbSDimitry Andric   *__cosptr = __tmp;
6715ffd83dbSDimitry Andric }
672e8d8bef9SDimitry Andric 
6735ffd83dbSDimitry Andric __DEVICE__
674*5f757f3fSDimitry Andric float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); }
675e8d8bef9SDimitry Andric 
6765ffd83dbSDimitry Andric __DEVICE__
677e8d8bef9SDimitry Andric float sinhf(float __x) { return __ocml_sinh_f32(__x); }
678e8d8bef9SDimitry Andric 
6795ffd83dbSDimitry Andric __DEVICE__
680e8d8bef9SDimitry Andric float sinpif(float __x) { return __ocml_sinpi_f32(__x); }
681e8d8bef9SDimitry Andric 
6825ffd83dbSDimitry Andric __DEVICE__
683*5f757f3fSDimitry Andric float sqrtf(float __x) { return __builtin_sqrtf(__x); }
684e8d8bef9SDimitry Andric 
6855ffd83dbSDimitry Andric __DEVICE__
686e8d8bef9SDimitry Andric float tanf(float __x) { return __ocml_tan_f32(__x); }
687e8d8bef9SDimitry Andric 
6885ffd83dbSDimitry Andric __DEVICE__
689e8d8bef9SDimitry Andric float tanhf(float __x) { return __ocml_tanh_f32(__x); }
690e8d8bef9SDimitry Andric 
6915ffd83dbSDimitry Andric __DEVICE__
692e8d8bef9SDimitry Andric float tgammaf(float __x) { return __ocml_tgamma_f32(__x); }
693e8d8bef9SDimitry Andric 
6945ffd83dbSDimitry Andric __DEVICE__
69506c3fb27SDimitry Andric float truncf(float __x) { return __builtin_truncf(__x); }
696e8d8bef9SDimitry Andric 
6975ffd83dbSDimitry Andric __DEVICE__
698e8d8bef9SDimitry Andric float y0f(float __x) { return __ocml_y0_f32(__x); }
699e8d8bef9SDimitry Andric 
7005ffd83dbSDimitry Andric __DEVICE__
701e8d8bef9SDimitry Andric float y1f(float __x) { return __ocml_y1_f32(__x); }
702e8d8bef9SDimitry Andric 
7035ffd83dbSDimitry Andric __DEVICE__
704e8d8bef9SDimitry Andric float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication
7055ffd83dbSDimitry Andric                                 // and the Miller & Brown algorithm
7065ffd83dbSDimitry Andric   //       for linear recurrences to get O(log n) steps, but it's unclear if
7075ffd83dbSDimitry Andric   //       it'd be beneficial in this case. Placeholder until OCML adds
7085ffd83dbSDimitry Andric   //       support.
7095ffd83dbSDimitry Andric   if (__n == 0)
7105ffd83dbSDimitry Andric     return y0f(__x);
7115ffd83dbSDimitry Andric   if (__n == 1)
7125ffd83dbSDimitry Andric     return y1f(__x);
7135ffd83dbSDimitry Andric 
7145ffd83dbSDimitry Andric   float __x0 = y0f(__x);
7155ffd83dbSDimitry Andric   float __x1 = y1f(__x);
7165ffd83dbSDimitry Andric   for (int __i = 1; __i < __n; ++__i) {
7175ffd83dbSDimitry Andric     float __x2 = (2 * __i) / __x * __x1 - __x0;
7185ffd83dbSDimitry Andric     __x0 = __x1;
7195ffd83dbSDimitry Andric     __x1 = __x2;
7205ffd83dbSDimitry Andric   }
7215ffd83dbSDimitry Andric 
7225ffd83dbSDimitry Andric   return __x1;
7235ffd83dbSDimitry Andric }
7245ffd83dbSDimitry Andric 
725e8d8bef9SDimitry Andric 
7265ffd83dbSDimitry Andric // END FLOAT
7275ffd83dbSDimitry Andric 
7285ffd83dbSDimitry Andric // BEGIN DOUBLE
7295ffd83dbSDimitry Andric __DEVICE__
730e8d8bef9SDimitry Andric double acos(double __x) { return __ocml_acos_f64(__x); }
731e8d8bef9SDimitry Andric 
7325ffd83dbSDimitry Andric __DEVICE__
733e8d8bef9SDimitry Andric double acosh(double __x) { return __ocml_acosh_f64(__x); }
734e8d8bef9SDimitry Andric 
7355ffd83dbSDimitry Andric __DEVICE__
736e8d8bef9SDimitry Andric double asin(double __x) { return __ocml_asin_f64(__x); }
737e8d8bef9SDimitry Andric 
7385ffd83dbSDimitry Andric __DEVICE__
739e8d8bef9SDimitry Andric double asinh(double __x) { return __ocml_asinh_f64(__x); }
740e8d8bef9SDimitry Andric 
7415ffd83dbSDimitry Andric __DEVICE__
742e8d8bef9SDimitry Andric double atan(double __x) { return __ocml_atan_f64(__x); }
743e8d8bef9SDimitry Andric 
7445ffd83dbSDimitry Andric __DEVICE__
745e8d8bef9SDimitry Andric double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); }
746e8d8bef9SDimitry Andric 
7475ffd83dbSDimitry Andric __DEVICE__
748e8d8bef9SDimitry Andric double atanh(double __x) { return __ocml_atanh_f64(__x); }
749e8d8bef9SDimitry Andric 
7505ffd83dbSDimitry Andric __DEVICE__
751e8d8bef9SDimitry Andric double cbrt(double __x) { return __ocml_cbrt_f64(__x); }
752e8d8bef9SDimitry Andric 
7535ffd83dbSDimitry Andric __DEVICE__
75406c3fb27SDimitry Andric double ceil(double __x) { return __builtin_ceil(__x); }
755e8d8bef9SDimitry Andric 
7565ffd83dbSDimitry Andric __DEVICE__
757e8d8bef9SDimitry Andric double copysign(double __x, double __y) {
75806c3fb27SDimitry Andric   return __builtin_copysign(__x, __y);
7595ffd83dbSDimitry Andric }
760e8d8bef9SDimitry Andric 
7615ffd83dbSDimitry Andric __DEVICE__
762e8d8bef9SDimitry Andric double cos(double __x) { return __ocml_cos_f64(__x); }
763e8d8bef9SDimitry Andric 
7645ffd83dbSDimitry Andric __DEVICE__
765e8d8bef9SDimitry Andric double cosh(double __x) { return __ocml_cosh_f64(__x); }
766e8d8bef9SDimitry Andric 
7675ffd83dbSDimitry Andric __DEVICE__
768e8d8bef9SDimitry Andric double cospi(double __x) { return __ocml_cospi_f64(__x); }
769e8d8bef9SDimitry Andric 
7705ffd83dbSDimitry Andric __DEVICE__
771e8d8bef9SDimitry Andric double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); }
772e8d8bef9SDimitry Andric 
7735ffd83dbSDimitry Andric __DEVICE__
774e8d8bef9SDimitry Andric double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); }
775e8d8bef9SDimitry Andric 
7765ffd83dbSDimitry Andric __DEVICE__
777e8d8bef9SDimitry Andric double erf(double __x) { return __ocml_erf_f64(__x); }
778e8d8bef9SDimitry Andric 
7795ffd83dbSDimitry Andric __DEVICE__
780e8d8bef9SDimitry Andric double erfc(double __x) { return __ocml_erfc_f64(__x); }
781e8d8bef9SDimitry Andric 
7825ffd83dbSDimitry Andric __DEVICE__
783e8d8bef9SDimitry Andric double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); }
784e8d8bef9SDimitry Andric 
7855ffd83dbSDimitry Andric __DEVICE__
786e8d8bef9SDimitry Andric double erfcx(double __x) { return __ocml_erfcx_f64(__x); }
787e8d8bef9SDimitry Andric 
7885ffd83dbSDimitry Andric __DEVICE__
789e8d8bef9SDimitry Andric double erfinv(double __x) { return __ocml_erfinv_f64(__x); }
790e8d8bef9SDimitry Andric 
7915ffd83dbSDimitry Andric __DEVICE__
792e8d8bef9SDimitry Andric double exp(double __x) { return __ocml_exp_f64(__x); }
793e8d8bef9SDimitry Andric 
7945ffd83dbSDimitry Andric __DEVICE__
795e8d8bef9SDimitry Andric double exp10(double __x) { return __ocml_exp10_f64(__x); }
796e8d8bef9SDimitry Andric 
7975ffd83dbSDimitry Andric __DEVICE__
798e8d8bef9SDimitry Andric double exp2(double __x) { return __ocml_exp2_f64(__x); }
799e8d8bef9SDimitry Andric 
8005ffd83dbSDimitry Andric __DEVICE__
801e8d8bef9SDimitry Andric double expm1(double __x) { return __ocml_expm1_f64(__x); }
802e8d8bef9SDimitry Andric 
8035ffd83dbSDimitry Andric __DEVICE__
804bdd1243dSDimitry Andric double fabs(double __x) { return __builtin_fabs(__x); }
805e8d8bef9SDimitry Andric 
8065ffd83dbSDimitry Andric __DEVICE__
807e8d8bef9SDimitry Andric double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); }
808e8d8bef9SDimitry Andric 
8095ffd83dbSDimitry Andric __DEVICE__
81006c3fb27SDimitry Andric double floor(double __x) { return __builtin_floor(__x); }
811e8d8bef9SDimitry Andric 
8125ffd83dbSDimitry Andric __DEVICE__
813e8d8bef9SDimitry Andric double fma(double __x, double __y, double __z) {
81406c3fb27SDimitry Andric   return __builtin_fma(__x, __y, __z);
8155ffd83dbSDimitry Andric }
816e8d8bef9SDimitry Andric 
8175ffd83dbSDimitry Andric __DEVICE__
81806c3fb27SDimitry Andric double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); }
819e8d8bef9SDimitry Andric 
8205ffd83dbSDimitry Andric __DEVICE__
82106c3fb27SDimitry Andric double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); }
822e8d8bef9SDimitry Andric 
8235ffd83dbSDimitry Andric __DEVICE__
824e8d8bef9SDimitry Andric double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); }
825e8d8bef9SDimitry Andric 
8265ffd83dbSDimitry Andric __DEVICE__
827e8d8bef9SDimitry Andric double frexp(double __x, int *__nptr) {
82806c3fb27SDimitry Andric   return __builtin_frexp(__x, __nptr);
8295ffd83dbSDimitry Andric }
830e8d8bef9SDimitry Andric 
8315ffd83dbSDimitry Andric __DEVICE__
832e8d8bef9SDimitry Andric double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); }
833e8d8bef9SDimitry Andric 
8345ffd83dbSDimitry Andric __DEVICE__
835e8d8bef9SDimitry Andric int ilogb(double __x) { return __ocml_ilogb_f64(__x); }
836e8d8bef9SDimitry Andric 
8375ffd83dbSDimitry Andric __DEVICE__
83806c3fb27SDimitry Andric __RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); }
839e8d8bef9SDimitry Andric 
8405ffd83dbSDimitry Andric __DEVICE__
84106c3fb27SDimitry Andric __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); }
842e8d8bef9SDimitry Andric 
8435ffd83dbSDimitry Andric __DEVICE__
84406c3fb27SDimitry Andric __RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); }
845e8d8bef9SDimitry Andric 
8465ffd83dbSDimitry Andric __DEVICE__
847e8d8bef9SDimitry Andric double j0(double __x) { return __ocml_j0_f64(__x); }
848e8d8bef9SDimitry Andric 
8495ffd83dbSDimitry Andric __DEVICE__
850e8d8bef9SDimitry Andric double j1(double __x) { return __ocml_j1_f64(__x); }
851e8d8bef9SDimitry Andric 
8525ffd83dbSDimitry Andric __DEVICE__
853e8d8bef9SDimitry Andric double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication
8545ffd83dbSDimitry Andric                                  // and the Miller & Brown algorithm
8555ffd83dbSDimitry Andric   //       for linear recurrences to get O(log n) steps, but it's unclear if
8565ffd83dbSDimitry Andric   //       it'd be beneficial in this case. Placeholder until OCML adds
8575ffd83dbSDimitry Andric   //       support.
8585ffd83dbSDimitry Andric   if (__n == 0)
859e8d8bef9SDimitry Andric     return j0(__x);
8605ffd83dbSDimitry Andric   if (__n == 1)
861e8d8bef9SDimitry Andric     return j1(__x);
8625ffd83dbSDimitry Andric 
863e8d8bef9SDimitry Andric   double __x0 = j0(__x);
864e8d8bef9SDimitry Andric   double __x1 = j1(__x);
8655ffd83dbSDimitry Andric   for (int __i = 1; __i < __n; ++__i) {
8665ffd83dbSDimitry Andric     double __x2 = (2 * __i) / __x * __x1 - __x0;
8675ffd83dbSDimitry Andric     __x0 = __x1;
8685ffd83dbSDimitry Andric     __x1 = __x2;
8695ffd83dbSDimitry Andric   }
8705ffd83dbSDimitry Andric   return __x1;
8715ffd83dbSDimitry Andric }
872e8d8bef9SDimitry Andric 
8735ffd83dbSDimitry Andric __DEVICE__
87406c3fb27SDimitry Andric double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); }
875e8d8bef9SDimitry Andric 
8765ffd83dbSDimitry Andric __DEVICE__
877e8d8bef9SDimitry Andric double lgamma(double __x) { return __ocml_lgamma_f64(__x); }
878e8d8bef9SDimitry Andric 
8795ffd83dbSDimitry Andric __DEVICE__
88006c3fb27SDimitry Andric long long int llrint(double __x) { return __builtin_rint(__x); }
881e8d8bef9SDimitry Andric 
8825ffd83dbSDimitry Andric __DEVICE__
88306c3fb27SDimitry Andric long long int llround(double __x) { return __builtin_round(__x); }
884e8d8bef9SDimitry Andric 
8855ffd83dbSDimitry Andric __DEVICE__
886e8d8bef9SDimitry Andric double log(double __x) { return __ocml_log_f64(__x); }
887e8d8bef9SDimitry Andric 
8885ffd83dbSDimitry Andric __DEVICE__
889e8d8bef9SDimitry Andric double log10(double __x) { return __ocml_log10_f64(__x); }
890e8d8bef9SDimitry Andric 
8915ffd83dbSDimitry Andric __DEVICE__
892e8d8bef9SDimitry Andric double log1p(double __x) { return __ocml_log1p_f64(__x); }
893e8d8bef9SDimitry Andric 
8945ffd83dbSDimitry Andric __DEVICE__
895e8d8bef9SDimitry Andric double log2(double __x) { return __ocml_log2_f64(__x); }
896e8d8bef9SDimitry Andric 
8975ffd83dbSDimitry Andric __DEVICE__
898e8d8bef9SDimitry Andric double logb(double __x) { return __ocml_logb_f64(__x); }
899e8d8bef9SDimitry Andric 
9005ffd83dbSDimitry Andric __DEVICE__
90106c3fb27SDimitry Andric long int lrint(double __x) { return __builtin_rint(__x); }
902e8d8bef9SDimitry Andric 
9035ffd83dbSDimitry Andric __DEVICE__
90406c3fb27SDimitry Andric long int lround(double __x) { return __builtin_round(__x); }
905e8d8bef9SDimitry Andric 
9065ffd83dbSDimitry Andric __DEVICE__
907e8d8bef9SDimitry Andric double modf(double __x, double *__iptr) {
9085ffd83dbSDimitry Andric   double __tmp;
90969ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
91069ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
91169ade1e0SDimitry Andric #endif
9125ffd83dbSDimitry Andric   double __r =
9135ffd83dbSDimitry Andric       __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp);
9145ffd83dbSDimitry Andric   *__iptr = __tmp;
9155ffd83dbSDimitry Andric 
9165ffd83dbSDimitry Andric   return __r;
9175ffd83dbSDimitry Andric }
918e8d8bef9SDimitry Andric 
9195ffd83dbSDimitry Andric __DEVICE__
920e8d8bef9SDimitry Andric double nan(const char *__tagp) {
9215ffd83dbSDimitry Andric #if !_WIN32
9225ffd83dbSDimitry Andric   union {
9235ffd83dbSDimitry Andric     double val;
9245ffd83dbSDimitry Andric     struct ieee_double {
9255ffd83dbSDimitry Andric       uint64_t mantissa : 51;
9265ffd83dbSDimitry Andric       uint32_t quiet : 1;
9275ffd83dbSDimitry Andric       uint32_t exponent : 11;
9285ffd83dbSDimitry Andric       uint32_t sign : 1;
9295ffd83dbSDimitry Andric     } bits;
9305ffd83dbSDimitry Andric   } __tmp;
931e8d8bef9SDimitry Andric   __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits));
9325ffd83dbSDimitry Andric 
9335ffd83dbSDimitry Andric   __tmp.bits.sign = 0u;
9345ffd83dbSDimitry Andric   __tmp.bits.exponent = ~0u;
9355ffd83dbSDimitry Andric   __tmp.bits.quiet = 1u;
9365ffd83dbSDimitry Andric   __tmp.bits.mantissa = __make_mantissa(__tagp);
9375ffd83dbSDimitry Andric 
9385ffd83dbSDimitry Andric   return __tmp.val;
9395ffd83dbSDimitry Andric #else
940e8d8bef9SDimitry Andric   __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double));
941e8d8bef9SDimitry Andric   uint64_t __val = __make_mantissa(__tagp);
942e8d8bef9SDimitry Andric   __val |= 0xFFF << 51;
943e8d8bef9SDimitry Andric   return *reinterpret_cast<double *>(&__val);
9445ffd83dbSDimitry Andric #endif
9455ffd83dbSDimitry Andric }
946e8d8bef9SDimitry Andric 
9475ffd83dbSDimitry Andric __DEVICE__
94806c3fb27SDimitry Andric double nearbyint(double __x) { return __builtin_nearbyint(__x); }
949e8d8bef9SDimitry Andric 
9505ffd83dbSDimitry Andric __DEVICE__
951e8d8bef9SDimitry Andric double nextafter(double __x, double __y) {
9525ffd83dbSDimitry Andric   return __ocml_nextafter_f64(__x, __y);
9535ffd83dbSDimitry Andric }
954e8d8bef9SDimitry Andric 
9555ffd83dbSDimitry Andric __DEVICE__
956e8d8bef9SDimitry Andric double norm(int __dim,
9575ffd83dbSDimitry Andric             const double *__a) { // TODO: placeholder until OCML adds support.
9585ffd83dbSDimitry Andric   double __r = 0;
9595ffd83dbSDimitry Andric   while (__dim--) {
9605ffd83dbSDimitry Andric     __r += __a[0] * __a[0];
9615ffd83dbSDimitry Andric     ++__a;
9625ffd83dbSDimitry Andric   }
9635ffd83dbSDimitry Andric 
964*5f757f3fSDimitry Andric   return __builtin_sqrt(__r);
9655ffd83dbSDimitry Andric }
966e8d8bef9SDimitry Andric 
9675ffd83dbSDimitry Andric __DEVICE__
968e8d8bef9SDimitry Andric double norm3d(double __x, double __y, double __z) {
9695ffd83dbSDimitry Andric   return __ocml_len3_f64(__x, __y, __z);
9705ffd83dbSDimitry Andric }
971e8d8bef9SDimitry Andric 
9725ffd83dbSDimitry Andric __DEVICE__
973e8d8bef9SDimitry Andric double norm4d(double __x, double __y, double __z, double __w) {
9745ffd83dbSDimitry Andric   return __ocml_len4_f64(__x, __y, __z, __w);
9755ffd83dbSDimitry Andric }
976e8d8bef9SDimitry Andric 
9775ffd83dbSDimitry Andric __DEVICE__
978e8d8bef9SDimitry Andric double normcdf(double __x) { return __ocml_ncdf_f64(__x); }
979e8d8bef9SDimitry Andric 
9805ffd83dbSDimitry Andric __DEVICE__
981e8d8bef9SDimitry Andric double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); }
982e8d8bef9SDimitry Andric 
9835ffd83dbSDimitry Andric __DEVICE__
984e8d8bef9SDimitry Andric double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); }
985e8d8bef9SDimitry Andric 
9865ffd83dbSDimitry Andric __DEVICE__
987e8d8bef9SDimitry Andric double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); }
988e8d8bef9SDimitry Andric 
9895ffd83dbSDimitry Andric __DEVICE__
990e8d8bef9SDimitry Andric double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); }
991e8d8bef9SDimitry Andric 
992e8d8bef9SDimitry Andric __DEVICE__
993e8d8bef9SDimitry Andric double remainder(double __x, double __y) {
9945ffd83dbSDimitry Andric   return __ocml_remainder_f64(__x, __y);
9955ffd83dbSDimitry Andric }
996e8d8bef9SDimitry Andric 
9975ffd83dbSDimitry Andric __DEVICE__
998e8d8bef9SDimitry Andric double remquo(double __x, double __y, int *__quo) {
9995ffd83dbSDimitry Andric   int __tmp;
100069ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
100169ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
100269ade1e0SDimitry Andric #endif
10035ffd83dbSDimitry Andric   double __r = __ocml_remquo_f64(
10045ffd83dbSDimitry Andric       __x, __y, (__attribute__((address_space(5))) int *)&__tmp);
10055ffd83dbSDimitry Andric   *__quo = __tmp;
10065ffd83dbSDimitry Andric 
10075ffd83dbSDimitry Andric   return __r;
10085ffd83dbSDimitry Andric }
1009e8d8bef9SDimitry Andric 
10105ffd83dbSDimitry Andric __DEVICE__
1011e8d8bef9SDimitry Andric double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); }
1012e8d8bef9SDimitry Andric 
10135ffd83dbSDimitry Andric __DEVICE__
101406c3fb27SDimitry Andric double rint(double __x) { return __builtin_rint(__x); }
1015e8d8bef9SDimitry Andric 
10165ffd83dbSDimitry Andric __DEVICE__
1017e8d8bef9SDimitry Andric double rnorm(int __dim,
10185ffd83dbSDimitry Andric              const double *__a) { // TODO: placeholder until OCML adds support.
10195ffd83dbSDimitry Andric   double __r = 0;
10205ffd83dbSDimitry Andric   while (__dim--) {
10215ffd83dbSDimitry Andric     __r += __a[0] * __a[0];
10225ffd83dbSDimitry Andric     ++__a;
10235ffd83dbSDimitry Andric   }
10245ffd83dbSDimitry Andric 
10255ffd83dbSDimitry Andric   return __ocml_rsqrt_f64(__r);
10265ffd83dbSDimitry Andric }
1027e8d8bef9SDimitry Andric 
10285ffd83dbSDimitry Andric __DEVICE__
1029e8d8bef9SDimitry Andric double rnorm3d(double __x, double __y, double __z) {
10305ffd83dbSDimitry Andric   return __ocml_rlen3_f64(__x, __y, __z);
10315ffd83dbSDimitry Andric }
1032e8d8bef9SDimitry Andric 
10335ffd83dbSDimitry Andric __DEVICE__
1034e8d8bef9SDimitry Andric double rnorm4d(double __x, double __y, double __z, double __w) {
10355ffd83dbSDimitry Andric   return __ocml_rlen4_f64(__x, __y, __z, __w);
10365ffd83dbSDimitry Andric }
1037e8d8bef9SDimitry Andric 
10385ffd83dbSDimitry Andric __DEVICE__
103906c3fb27SDimitry Andric double round(double __x) { return __builtin_round(__x); }
1040e8d8bef9SDimitry Andric 
10415ffd83dbSDimitry Andric __DEVICE__
1042e8d8bef9SDimitry Andric double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); }
1043e8d8bef9SDimitry Andric 
10445ffd83dbSDimitry Andric __DEVICE__
1045e8d8bef9SDimitry Andric double scalbln(double __x, long int __n) {
104606c3fb27SDimitry Andric   return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n)
10475ffd83dbSDimitry Andric                          : __ocml_scalb_f64(__x, __n);
10485ffd83dbSDimitry Andric }
10495ffd83dbSDimitry Andric __DEVICE__
105006c3fb27SDimitry Andric double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); }
1051e8d8bef9SDimitry Andric 
10525ffd83dbSDimitry Andric __DEVICE__
105306c3fb27SDimitry Andric __RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); }
1054e8d8bef9SDimitry Andric 
10555ffd83dbSDimitry Andric __DEVICE__
1056e8d8bef9SDimitry Andric double sin(double __x) { return __ocml_sin_f64(__x); }
1057e8d8bef9SDimitry Andric 
10585ffd83dbSDimitry Andric __DEVICE__
1059e8d8bef9SDimitry Andric void sincos(double __x, double *__sinptr, double *__cosptr) {
10605ffd83dbSDimitry Andric   double __tmp;
106169ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
106269ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
106369ade1e0SDimitry Andric #endif
10645ffd83dbSDimitry Andric   *__sinptr = __ocml_sincos_f64(
10655ffd83dbSDimitry Andric       __x, (__attribute__((address_space(5))) double *)&__tmp);
10665ffd83dbSDimitry Andric   *__cosptr = __tmp;
10675ffd83dbSDimitry Andric }
1068e8d8bef9SDimitry Andric 
10695ffd83dbSDimitry Andric __DEVICE__
1070e8d8bef9SDimitry Andric void sincospi(double __x, double *__sinptr, double *__cosptr) {
10715ffd83dbSDimitry Andric   double __tmp;
107269ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__
107369ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc)
107469ade1e0SDimitry Andric #endif
10755ffd83dbSDimitry Andric   *__sinptr = __ocml_sincospi_f64(
10765ffd83dbSDimitry Andric       __x, (__attribute__((address_space(5))) double *)&__tmp);
10775ffd83dbSDimitry Andric   *__cosptr = __tmp;
10785ffd83dbSDimitry Andric }
1079e8d8bef9SDimitry Andric 
10805ffd83dbSDimitry Andric __DEVICE__
1081e8d8bef9SDimitry Andric double sinh(double __x) { return __ocml_sinh_f64(__x); }
1082e8d8bef9SDimitry Andric 
10835ffd83dbSDimitry Andric __DEVICE__
1084e8d8bef9SDimitry Andric double sinpi(double __x) { return __ocml_sinpi_f64(__x); }
1085e8d8bef9SDimitry Andric 
10865ffd83dbSDimitry Andric __DEVICE__
1087*5f757f3fSDimitry Andric double sqrt(double __x) { return __builtin_sqrt(__x); }
1088e8d8bef9SDimitry Andric 
10895ffd83dbSDimitry Andric __DEVICE__
1090e8d8bef9SDimitry Andric double tan(double __x) { return __ocml_tan_f64(__x); }
1091e8d8bef9SDimitry Andric 
10925ffd83dbSDimitry Andric __DEVICE__
1093e8d8bef9SDimitry Andric double tanh(double __x) { return __ocml_tanh_f64(__x); }
1094e8d8bef9SDimitry Andric 
10955ffd83dbSDimitry Andric __DEVICE__
1096e8d8bef9SDimitry Andric double tgamma(double __x) { return __ocml_tgamma_f64(__x); }
1097e8d8bef9SDimitry Andric 
10985ffd83dbSDimitry Andric __DEVICE__
109906c3fb27SDimitry Andric double trunc(double __x) { return __builtin_trunc(__x); }
1100e8d8bef9SDimitry Andric 
11015ffd83dbSDimitry Andric __DEVICE__
1102e8d8bef9SDimitry Andric double y0(double __x) { return __ocml_y0_f64(__x); }
1103e8d8bef9SDimitry Andric 
11045ffd83dbSDimitry Andric __DEVICE__
1105e8d8bef9SDimitry Andric double y1(double __x) { return __ocml_y1_f64(__x); }
1106e8d8bef9SDimitry Andric 
11075ffd83dbSDimitry Andric __DEVICE__
1108e8d8bef9SDimitry Andric double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication
11095ffd83dbSDimitry Andric                                  // and the Miller & Brown algorithm
11105ffd83dbSDimitry Andric   //       for linear recurrences to get O(log n) steps, but it's unclear if
11115ffd83dbSDimitry Andric   //       it'd be beneficial in this case. Placeholder until OCML adds
11125ffd83dbSDimitry Andric   //       support.
11135ffd83dbSDimitry Andric   if (__n == 0)
1114e8d8bef9SDimitry Andric     return y0(__x);
11155ffd83dbSDimitry Andric   if (__n == 1)
1116e8d8bef9SDimitry Andric     return y1(__x);
11175ffd83dbSDimitry Andric 
1118e8d8bef9SDimitry Andric   double __x0 = y0(__x);
1119e8d8bef9SDimitry Andric   double __x1 = y1(__x);
11205ffd83dbSDimitry Andric   for (int __i = 1; __i < __n; ++__i) {
11215ffd83dbSDimitry Andric     double __x2 = (2 * __i) / __x * __x1 - __x0;
11225ffd83dbSDimitry Andric     __x0 = __x1;
11235ffd83dbSDimitry Andric     __x1 = __x2;
11245ffd83dbSDimitry Andric   }
11255ffd83dbSDimitry Andric 
11265ffd83dbSDimitry Andric   return __x1;
11275ffd83dbSDimitry Andric }
11285ffd83dbSDimitry Andric 
11295ffd83dbSDimitry Andric // BEGIN INTRINSICS
11305ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
11315ffd83dbSDimitry Andric __DEVICE__
1132e8d8bef9SDimitry Andric double __dadd_rd(double __x, double __y) {
11335ffd83dbSDimitry Andric   return __ocml_add_rtn_f64(__x, __y);
11345ffd83dbSDimitry Andric }
11355ffd83dbSDimitry Andric __DEVICE__
1136e8d8bef9SDimitry Andric double __dadd_rn(double __x, double __y) {
1137e8d8bef9SDimitry Andric   return __ocml_add_rte_f64(__x, __y);
1138e8d8bef9SDimitry Andric }
11395ffd83dbSDimitry Andric __DEVICE__
1140e8d8bef9SDimitry Andric double __dadd_ru(double __x, double __y) {
11415ffd83dbSDimitry Andric   return __ocml_add_rtp_f64(__x, __y);
11425ffd83dbSDimitry Andric }
11435ffd83dbSDimitry Andric __DEVICE__
1144e8d8bef9SDimitry Andric double __dadd_rz(double __x, double __y) {
11455ffd83dbSDimitry Andric   return __ocml_add_rtz_f64(__x, __y);
11465ffd83dbSDimitry Andric }
1147e8d8bef9SDimitry Andric #else
11485ffd83dbSDimitry Andric __DEVICE__
1149e8d8bef9SDimitry Andric double __dadd_rn(double __x, double __y) { return __x + __y; }
11505ffd83dbSDimitry Andric #endif
1151e8d8bef9SDimitry Andric 
11525ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
11535ffd83dbSDimitry Andric __DEVICE__
1154e8d8bef9SDimitry Andric double __ddiv_rd(double __x, double __y) {
1155e8d8bef9SDimitry Andric   return __ocml_div_rtn_f64(__x, __y);
1156e8d8bef9SDimitry Andric }
1157e8d8bef9SDimitry Andric __DEVICE__
1158e8d8bef9SDimitry Andric double __ddiv_rn(double __x, double __y) {
1159e8d8bef9SDimitry Andric   return __ocml_div_rte_f64(__x, __y);
1160e8d8bef9SDimitry Andric }
1161e8d8bef9SDimitry Andric __DEVICE__
1162e8d8bef9SDimitry Andric double __ddiv_ru(double __x, double __y) {
11635ffd83dbSDimitry Andric   return __ocml_div_rtp_f64(__x, __y);
11645ffd83dbSDimitry Andric }
11655ffd83dbSDimitry Andric __DEVICE__
1166e8d8bef9SDimitry Andric double __ddiv_rz(double __x, double __y) {
11675ffd83dbSDimitry Andric   return __ocml_div_rtz_f64(__x, __y);
11685ffd83dbSDimitry Andric }
1169e8d8bef9SDimitry Andric #else
11705ffd83dbSDimitry Andric __DEVICE__
1171e8d8bef9SDimitry Andric double __ddiv_rn(double __x, double __y) { return __x / __y; }
11725ffd83dbSDimitry Andric #endif
1173e8d8bef9SDimitry Andric 
11745ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
11755ffd83dbSDimitry Andric __DEVICE__
1176e8d8bef9SDimitry Andric double __dmul_rd(double __x, double __y) {
1177e8d8bef9SDimitry Andric   return __ocml_mul_rtn_f64(__x, __y);
1178e8d8bef9SDimitry Andric }
1179e8d8bef9SDimitry Andric __DEVICE__
1180e8d8bef9SDimitry Andric double __dmul_rn(double __x, double __y) {
1181e8d8bef9SDimitry Andric   return __ocml_mul_rte_f64(__x, __y);
1182e8d8bef9SDimitry Andric }
1183e8d8bef9SDimitry Andric __DEVICE__
1184e8d8bef9SDimitry Andric double __dmul_ru(double __x, double __y) {
11855ffd83dbSDimitry Andric   return __ocml_mul_rtp_f64(__x, __y);
11865ffd83dbSDimitry Andric }
11875ffd83dbSDimitry Andric __DEVICE__
1188e8d8bef9SDimitry Andric double __dmul_rz(double __x, double __y) {
11895ffd83dbSDimitry Andric   return __ocml_mul_rtz_f64(__x, __y);
11905ffd83dbSDimitry Andric }
1191e8d8bef9SDimitry Andric #else
11925ffd83dbSDimitry Andric __DEVICE__
1193e8d8bef9SDimitry Andric double __dmul_rn(double __x, double __y) { return __x * __y; }
11945ffd83dbSDimitry Andric #endif
1195e8d8bef9SDimitry Andric 
11965ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
11975ffd83dbSDimitry Andric __DEVICE__
1198e8d8bef9SDimitry Andric double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); }
11995ffd83dbSDimitry Andric __DEVICE__
1200e8d8bef9SDimitry Andric double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); }
12015ffd83dbSDimitry Andric __DEVICE__
1202e8d8bef9SDimitry Andric double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); }
1203e8d8bef9SDimitry Andric __DEVICE__
1204e8d8bef9SDimitry Andric double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); }
1205e8d8bef9SDimitry Andric #else
1206e8d8bef9SDimitry Andric __DEVICE__
1207e8d8bef9SDimitry Andric double __drcp_rn(double __x) { return 1.0 / __x; }
12085ffd83dbSDimitry Andric #endif
1209e8d8bef9SDimitry Andric 
12105ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
12115ffd83dbSDimitry Andric __DEVICE__
1212e8d8bef9SDimitry Andric double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); }
12135ffd83dbSDimitry Andric __DEVICE__
1214e8d8bef9SDimitry Andric double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); }
12155ffd83dbSDimitry Andric __DEVICE__
1216e8d8bef9SDimitry Andric double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); }
1217e8d8bef9SDimitry Andric __DEVICE__
1218e8d8bef9SDimitry Andric double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); }
1219e8d8bef9SDimitry Andric #else
1220e8d8bef9SDimitry Andric __DEVICE__
1221*5f757f3fSDimitry Andric double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); }
1222e8d8bef9SDimitry Andric #endif
1223e8d8bef9SDimitry Andric 
1224e8d8bef9SDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
1225e8d8bef9SDimitry Andric __DEVICE__
1226e8d8bef9SDimitry Andric double __dsub_rd(double __x, double __y) {
12275ffd83dbSDimitry Andric   return __ocml_sub_rtn_f64(__x, __y);
12285ffd83dbSDimitry Andric }
12295ffd83dbSDimitry Andric __DEVICE__
1230e8d8bef9SDimitry Andric double __dsub_rn(double __x, double __y) {
1231e8d8bef9SDimitry Andric   return __ocml_sub_rte_f64(__x, __y);
1232e8d8bef9SDimitry Andric }
12335ffd83dbSDimitry Andric __DEVICE__
1234e8d8bef9SDimitry Andric double __dsub_ru(double __x, double __y) {
12355ffd83dbSDimitry Andric   return __ocml_sub_rtp_f64(__x, __y);
12365ffd83dbSDimitry Andric }
12375ffd83dbSDimitry Andric __DEVICE__
1238e8d8bef9SDimitry Andric double __dsub_rz(double __x, double __y) {
12395ffd83dbSDimitry Andric   return __ocml_sub_rtz_f64(__x, __y);
12405ffd83dbSDimitry Andric }
1241e8d8bef9SDimitry Andric #else
12425ffd83dbSDimitry Andric __DEVICE__
1243e8d8bef9SDimitry Andric double __dsub_rn(double __x, double __y) { return __x - __y; }
12445ffd83dbSDimitry Andric #endif
1245e8d8bef9SDimitry Andric 
12465ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS
12475ffd83dbSDimitry Andric __DEVICE__
1248e8d8bef9SDimitry Andric double __fma_rd(double __x, double __y, double __z) {
1249e8d8bef9SDimitry Andric   return __ocml_fma_rtn_f64(__x, __y, __z);
1250e8d8bef9SDimitry Andric }
1251e8d8bef9SDimitry Andric __DEVICE__
1252e8d8bef9SDimitry Andric double __fma_rn(double __x, double __y, double __z) {
1253e8d8bef9SDimitry Andric   return __ocml_fma_rte_f64(__x, __y, __z);
1254e8d8bef9SDimitry Andric }
1255e8d8bef9SDimitry Andric __DEVICE__
1256e8d8bef9SDimitry Andric double __fma_ru(double __x, double __y, double __z) {
12575ffd83dbSDimitry Andric   return __ocml_fma_rtp_f64(__x, __y, __z);
12585ffd83dbSDimitry Andric }
12595ffd83dbSDimitry Andric __DEVICE__
1260e8d8bef9SDimitry Andric double __fma_rz(double __x, double __y, double __z) {
12615ffd83dbSDimitry Andric   return __ocml_fma_rtz_f64(__x, __y, __z);
12625ffd83dbSDimitry Andric }
1263e8d8bef9SDimitry Andric #else
1264e8d8bef9SDimitry Andric __DEVICE__
1265e8d8bef9SDimitry Andric double __fma_rn(double __x, double __y, double __z) {
126606c3fb27SDimitry Andric   return __builtin_fma(__x, __y, __z);
1267e8d8bef9SDimitry Andric }
12685ffd83dbSDimitry Andric #endif
12695ffd83dbSDimitry Andric // END INTRINSICS
12705ffd83dbSDimitry Andric // END DOUBLE
12715ffd83dbSDimitry Andric 
1272e8d8bef9SDimitry Andric // C only macros
1273e8d8bef9SDimitry Andric #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
1274e8d8bef9SDimitry Andric #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x)
1275e8d8bef9SDimitry Andric #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x)
1276e8d8bef9SDimitry Andric #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x)
1277e8d8bef9SDimitry Andric #define signbit(__x)                                                           \
1278e8d8bef9SDimitry Andric   _Generic((__x), float : __signbitf, double : __signbit)(__x)
1279e8d8bef9SDimitry Andric #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L
12805ffd83dbSDimitry Andric 
12815ffd83dbSDimitry Andric #if defined(__cplusplus)
1282e8d8bef9SDimitry Andric template <class T> __DEVICE__ T min(T __arg1, T __arg2) {
12835ffd83dbSDimitry Andric   return (__arg1 < __arg2) ? __arg1 : __arg2;
12845ffd83dbSDimitry Andric }
12855ffd83dbSDimitry Andric 
1286e8d8bef9SDimitry Andric template <class T> __DEVICE__ T max(T __arg1, T __arg2) {
12875ffd83dbSDimitry Andric   return (__arg1 > __arg2) ? __arg1 : __arg2;
12885ffd83dbSDimitry Andric }
12895ffd83dbSDimitry Andric 
1290e8d8bef9SDimitry Andric __DEVICE__ int min(int __arg1, int __arg2) {
12915ffd83dbSDimitry Andric   return (__arg1 < __arg2) ? __arg1 : __arg2;
12925ffd83dbSDimitry Andric }
1293e8d8bef9SDimitry Andric __DEVICE__ int max(int __arg1, int __arg2) {
12945ffd83dbSDimitry Andric   return (__arg1 > __arg2) ? __arg1 : __arg2;
12955ffd83dbSDimitry Andric }
12965ffd83dbSDimitry Andric 
12975ffd83dbSDimitry Andric __DEVICE__
129806c3fb27SDimitry Andric float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); }
12995ffd83dbSDimitry Andric 
13005ffd83dbSDimitry Andric __DEVICE__
130106c3fb27SDimitry Andric double max(double __x, double __y) { return __builtin_fmax(__x, __y); }
13025ffd83dbSDimitry Andric 
13035ffd83dbSDimitry Andric __DEVICE__
130406c3fb27SDimitry Andric float min(float __x, float __y) { return __builtin_fminf(__x, __y); }
13055ffd83dbSDimitry Andric 
13065ffd83dbSDimitry Andric __DEVICE__
130706c3fb27SDimitry Andric double min(double __x, double __y) { return __builtin_fmin(__x, __y); }
13085ffd83dbSDimitry Andric 
130969ade1e0SDimitry Andric #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
13105ffd83dbSDimitry Andric __host__ inline static int min(int __arg1, int __arg2) {
1311*5f757f3fSDimitry Andric   return __arg1 < __arg2 ? __arg1 : __arg2;
13125ffd83dbSDimitry Andric }
13135ffd83dbSDimitry Andric 
13145ffd83dbSDimitry Andric __host__ inline static int max(int __arg1, int __arg2) {
1315*5f757f3fSDimitry Andric   return __arg1 > __arg2 ? __arg1 : __arg2;
13165ffd83dbSDimitry Andric }
131769ade1e0SDimitry Andric #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__)
1318e8d8bef9SDimitry Andric #endif
13195ffd83dbSDimitry Andric 
13205ffd83dbSDimitry Andric #pragma pop_macro("__DEVICE__")
13215ffd83dbSDimitry Andric #pragma pop_macro("__RETURN_TYPE")
1322*5f757f3fSDimitry Andric #pragma pop_macro("__FAST_OR_SLOW")
13235ffd83dbSDimitry Andric 
13245ffd83dbSDimitry Andric #endif // __CLANG_HIP_MATH_H__
1325