1e8d8bef9SDimitry Andric /*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== 25ffd83dbSDimitry Andric * 35ffd83dbSDimitry Andric * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 45ffd83dbSDimitry Andric * See https://llvm.org/LICENSE.txt for license information. 55ffd83dbSDimitry Andric * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 65ffd83dbSDimitry Andric * 75ffd83dbSDimitry Andric *===-----------------------------------------------------------------------=== 85ffd83dbSDimitry Andric */ 95ffd83dbSDimitry Andric #ifndef __CLANG_HIP_MATH_H__ 105ffd83dbSDimitry Andric #define __CLANG_HIP_MATH_H__ 115ffd83dbSDimitry Andric 1269ade1e0SDimitry Andric #if !defined(__HIP__) && !defined(__OPENMP_AMDGCN__) 13e8d8bef9SDimitry Andric #error "This file is for HIP and OpenMP AMDGCN device compilation only." 14e8d8bef9SDimitry Andric #endif 15e8d8bef9SDimitry Andric 16fe6060f1SDimitry Andric #if !defined(__HIPCC_RTC__) 175ffd83dbSDimitry Andric #include <limits.h> 185ffd83dbSDimitry Andric #include <stdint.h> 1969ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 2069ade1e0SDimitry Andric #include <omp.h> 2169ade1e0SDimitry Andric #endif 2269ade1e0SDimitry Andric #endif // !defined(__HIPCC_RTC__) 235ffd83dbSDimitry Andric 245ffd83dbSDimitry Andric #pragma push_macro("__DEVICE__") 2569ade1e0SDimitry Andric 2669ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 2769ade1e0SDimitry Andric #define __DEVICE__ static inline __attribute__((always_inline, nothrow)) 2869ade1e0SDimitry Andric #else 29e8d8bef9SDimitry Andric #define __DEVICE__ static __device__ inline __attribute__((always_inline)) 3069ade1e0SDimitry Andric #endif 315ffd83dbSDimitry Andric 32*5f757f3fSDimitry Andric // Device library provides fast low precision and slow full-recision 33*5f757f3fSDimitry Andric // implementations for some functions. Which one gets selected depends on 34*5f757f3fSDimitry Andric // __CLANG_GPU_APPROX_TRANSCENDENTALS__ which gets defined by clang if 35*5f757f3fSDimitry Andric // -ffast-math or -fgpu-approx-transcendentals are in effect. 36*5f757f3fSDimitry Andric #pragma push_macro("__FAST_OR_SLOW") 37*5f757f3fSDimitry Andric #if defined(__CLANG_GPU_APPROX_TRANSCENDENTALS__) 38*5f757f3fSDimitry Andric #define __FAST_OR_SLOW(fast, slow) fast 39*5f757f3fSDimitry Andric #else 40*5f757f3fSDimitry Andric #define __FAST_OR_SLOW(fast, slow) slow 41*5f757f3fSDimitry Andric #endif 42*5f757f3fSDimitry Andric 43e8d8bef9SDimitry Andric // A few functions return bool type starting only in C++11. 44e8d8bef9SDimitry Andric #pragma push_macro("__RETURN_TYPE") 4569ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 4669ade1e0SDimitry Andric #define __RETURN_TYPE int 4769ade1e0SDimitry Andric #else 48e8d8bef9SDimitry Andric #if defined(__cplusplus) 495ffd83dbSDimitry Andric #define __RETURN_TYPE bool 50e8d8bef9SDimitry Andric #else 51e8d8bef9SDimitry Andric #define __RETURN_TYPE int 52e8d8bef9SDimitry Andric #endif 5369ade1e0SDimitry Andric #endif // __OPENMP_AMDGCN__ 54e8d8bef9SDimitry Andric 55e8d8bef9SDimitry Andric #if defined (__cplusplus) && __cplusplus < 201103L 56e8d8bef9SDimitry Andric // emulate static_assert on type sizes 57e8d8bef9SDimitry Andric template<bool> 58e8d8bef9SDimitry Andric struct __compare_result{}; 59e8d8bef9SDimitry Andric template<> 60e8d8bef9SDimitry Andric struct __compare_result<true> { 61fe6060f1SDimitry Andric static const __device__ bool valid; 62e8d8bef9SDimitry Andric }; 635ffd83dbSDimitry Andric 645ffd83dbSDimitry Andric __DEVICE__ 65e8d8bef9SDimitry Andric void __suppress_unused_warning(bool b){}; 66e8d8bef9SDimitry Andric template <unsigned int S, unsigned int T> 67e8d8bef9SDimitry Andric __DEVICE__ void __static_assert_equal_size() { 68e8d8bef9SDimitry Andric __suppress_unused_warning(__compare_result<S == T>::valid); 69e8d8bef9SDimitry Andric } 70e8d8bef9SDimitry Andric 71e8d8bef9SDimitry Andric #define __static_assert_type_size_equal(A, B) \ 72e8d8bef9SDimitry Andric __static_assert_equal_size<A,B>() 73e8d8bef9SDimitry Andric 74e8d8bef9SDimitry Andric #else 75e8d8bef9SDimitry Andric #define __static_assert_type_size_equal(A,B) \ 76e8d8bef9SDimitry Andric static_assert((A) == (B), "") 77e8d8bef9SDimitry Andric 78e8d8bef9SDimitry Andric #endif 79e8d8bef9SDimitry Andric 80e8d8bef9SDimitry Andric __DEVICE__ 81bdd1243dSDimitry Andric uint64_t __make_mantissa_base8(const char *__tagp __attribute__((nonnull))) { 825ffd83dbSDimitry Andric uint64_t __r = 0; 83bdd1243dSDimitry Andric while (*__tagp != '\0') { 845ffd83dbSDimitry Andric char __tmp = *__tagp; 855ffd83dbSDimitry Andric 865ffd83dbSDimitry Andric if (__tmp >= '0' && __tmp <= '7') 875ffd83dbSDimitry Andric __r = (__r * 8u) + __tmp - '0'; 885ffd83dbSDimitry Andric else 895ffd83dbSDimitry Andric return 0; 905ffd83dbSDimitry Andric 915ffd83dbSDimitry Andric ++__tagp; 925ffd83dbSDimitry Andric } 935ffd83dbSDimitry Andric 945ffd83dbSDimitry Andric return __r; 955ffd83dbSDimitry Andric } 965ffd83dbSDimitry Andric 975ffd83dbSDimitry Andric __DEVICE__ 98bdd1243dSDimitry Andric uint64_t __make_mantissa_base10(const char *__tagp __attribute__((nonnull))) { 995ffd83dbSDimitry Andric uint64_t __r = 0; 100bdd1243dSDimitry Andric while (*__tagp != '\0') { 1015ffd83dbSDimitry Andric char __tmp = *__tagp; 1025ffd83dbSDimitry Andric 1035ffd83dbSDimitry Andric if (__tmp >= '0' && __tmp <= '9') 1045ffd83dbSDimitry Andric __r = (__r * 10u) + __tmp - '0'; 1055ffd83dbSDimitry Andric else 1065ffd83dbSDimitry Andric return 0; 1075ffd83dbSDimitry Andric 1085ffd83dbSDimitry Andric ++__tagp; 1095ffd83dbSDimitry Andric } 1105ffd83dbSDimitry Andric 1115ffd83dbSDimitry Andric return __r; 1125ffd83dbSDimitry Andric } 1135ffd83dbSDimitry Andric 1145ffd83dbSDimitry Andric __DEVICE__ 115bdd1243dSDimitry Andric uint64_t __make_mantissa_base16(const char *__tagp __attribute__((nonnull))) { 1165ffd83dbSDimitry Andric uint64_t __r = 0; 117bdd1243dSDimitry Andric while (*__tagp != '\0') { 1185ffd83dbSDimitry Andric char __tmp = *__tagp; 1195ffd83dbSDimitry Andric 1205ffd83dbSDimitry Andric if (__tmp >= '0' && __tmp <= '9') 1215ffd83dbSDimitry Andric __r = (__r * 16u) + __tmp - '0'; 1225ffd83dbSDimitry Andric else if (__tmp >= 'a' && __tmp <= 'f') 1235ffd83dbSDimitry Andric __r = (__r * 16u) + __tmp - 'a' + 10; 1245ffd83dbSDimitry Andric else if (__tmp >= 'A' && __tmp <= 'F') 1255ffd83dbSDimitry Andric __r = (__r * 16u) + __tmp - 'A' + 10; 1265ffd83dbSDimitry Andric else 1275ffd83dbSDimitry Andric return 0; 1285ffd83dbSDimitry Andric 1295ffd83dbSDimitry Andric ++__tagp; 1305ffd83dbSDimitry Andric } 1315ffd83dbSDimitry Andric 1325ffd83dbSDimitry Andric return __r; 1335ffd83dbSDimitry Andric } 1345ffd83dbSDimitry Andric 1355ffd83dbSDimitry Andric __DEVICE__ 136bdd1243dSDimitry Andric uint64_t __make_mantissa(const char *__tagp __attribute__((nonnull))) { 1375ffd83dbSDimitry Andric if (*__tagp == '0') { 1385ffd83dbSDimitry Andric ++__tagp; 1395ffd83dbSDimitry Andric 1405ffd83dbSDimitry Andric if (*__tagp == 'x' || *__tagp == 'X') 1415ffd83dbSDimitry Andric return __make_mantissa_base16(__tagp); 1425ffd83dbSDimitry Andric else 1435ffd83dbSDimitry Andric return __make_mantissa_base8(__tagp); 1445ffd83dbSDimitry Andric } 1455ffd83dbSDimitry Andric 1465ffd83dbSDimitry Andric return __make_mantissa_base10(__tagp); 1475ffd83dbSDimitry Andric } 1485ffd83dbSDimitry Andric 1495ffd83dbSDimitry Andric // BEGIN FLOAT 150*5f757f3fSDimitry Andric 151*5f757f3fSDimitry Andric // BEGIN INTRINSICS 152*5f757f3fSDimitry Andric 153*5f757f3fSDimitry Andric __DEVICE__ 154*5f757f3fSDimitry Andric float __cosf(float __x) { return __ocml_native_cos_f32(__x); } 155*5f757f3fSDimitry Andric 156*5f757f3fSDimitry Andric __DEVICE__ 157*5f757f3fSDimitry Andric float __exp10f(float __x) { 158*5f757f3fSDimitry Andric const float __log2_10 = 0x1.a934f0p+1f; 159*5f757f3fSDimitry Andric return __builtin_amdgcn_exp2f(__log2_10 * __x); 160*5f757f3fSDimitry Andric } 161*5f757f3fSDimitry Andric 162*5f757f3fSDimitry Andric __DEVICE__ 163*5f757f3fSDimitry Andric float __expf(float __x) { 164*5f757f3fSDimitry Andric const float __log2_e = 0x1.715476p+0; 165*5f757f3fSDimitry Andric return __builtin_amdgcn_exp2f(__log2_e * __x); 166*5f757f3fSDimitry Andric } 167*5f757f3fSDimitry Andric 168*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 169*5f757f3fSDimitry Andric __DEVICE__ 170*5f757f3fSDimitry Andric float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } 171*5f757f3fSDimitry Andric __DEVICE__ 172*5f757f3fSDimitry Andric float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } 173*5f757f3fSDimitry Andric __DEVICE__ 174*5f757f3fSDimitry Andric float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } 175*5f757f3fSDimitry Andric __DEVICE__ 176*5f757f3fSDimitry Andric float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } 177*5f757f3fSDimitry Andric #else 178*5f757f3fSDimitry Andric __DEVICE__ 179*5f757f3fSDimitry Andric float __fadd_rn(float __x, float __y) { return __x + __y; } 180*5f757f3fSDimitry Andric #endif 181*5f757f3fSDimitry Andric 182*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 183*5f757f3fSDimitry Andric __DEVICE__ 184*5f757f3fSDimitry Andric float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } 185*5f757f3fSDimitry Andric __DEVICE__ 186*5f757f3fSDimitry Andric float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } 187*5f757f3fSDimitry Andric __DEVICE__ 188*5f757f3fSDimitry Andric float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } 189*5f757f3fSDimitry Andric __DEVICE__ 190*5f757f3fSDimitry Andric float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } 191*5f757f3fSDimitry Andric #else 192*5f757f3fSDimitry Andric __DEVICE__ 193*5f757f3fSDimitry Andric float __fdiv_rn(float __x, float __y) { return __x / __y; } 194*5f757f3fSDimitry Andric #endif 195*5f757f3fSDimitry Andric 196*5f757f3fSDimitry Andric __DEVICE__ 197*5f757f3fSDimitry Andric float __fdividef(float __x, float __y) { return __x / __y; } 198*5f757f3fSDimitry Andric 199*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 200*5f757f3fSDimitry Andric __DEVICE__ 201*5f757f3fSDimitry Andric float __fmaf_rd(float __x, float __y, float __z) { 202*5f757f3fSDimitry Andric return __ocml_fma_rtn_f32(__x, __y, __z); 203*5f757f3fSDimitry Andric } 204*5f757f3fSDimitry Andric __DEVICE__ 205*5f757f3fSDimitry Andric float __fmaf_rn(float __x, float __y, float __z) { 206*5f757f3fSDimitry Andric return __ocml_fma_rte_f32(__x, __y, __z); 207*5f757f3fSDimitry Andric } 208*5f757f3fSDimitry Andric __DEVICE__ 209*5f757f3fSDimitry Andric float __fmaf_ru(float __x, float __y, float __z) { 210*5f757f3fSDimitry Andric return __ocml_fma_rtp_f32(__x, __y, __z); 211*5f757f3fSDimitry Andric } 212*5f757f3fSDimitry Andric __DEVICE__ 213*5f757f3fSDimitry Andric float __fmaf_rz(float __x, float __y, float __z) { 214*5f757f3fSDimitry Andric return __ocml_fma_rtz_f32(__x, __y, __z); 215*5f757f3fSDimitry Andric } 216*5f757f3fSDimitry Andric #else 217*5f757f3fSDimitry Andric __DEVICE__ 218*5f757f3fSDimitry Andric float __fmaf_rn(float __x, float __y, float __z) { 219*5f757f3fSDimitry Andric return __builtin_fmaf(__x, __y, __z); 220*5f757f3fSDimitry Andric } 221*5f757f3fSDimitry Andric #endif 222*5f757f3fSDimitry Andric 223*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 224*5f757f3fSDimitry Andric __DEVICE__ 225*5f757f3fSDimitry Andric float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } 226*5f757f3fSDimitry Andric __DEVICE__ 227*5f757f3fSDimitry Andric float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } 228*5f757f3fSDimitry Andric __DEVICE__ 229*5f757f3fSDimitry Andric float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } 230*5f757f3fSDimitry Andric __DEVICE__ 231*5f757f3fSDimitry Andric float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } 232*5f757f3fSDimitry Andric #else 233*5f757f3fSDimitry Andric __DEVICE__ 234*5f757f3fSDimitry Andric float __fmul_rn(float __x, float __y) { return __x * __y; } 235*5f757f3fSDimitry Andric #endif 236*5f757f3fSDimitry Andric 237*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 238*5f757f3fSDimitry Andric __DEVICE__ 239*5f757f3fSDimitry Andric float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } 240*5f757f3fSDimitry Andric __DEVICE__ 241*5f757f3fSDimitry Andric float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } 242*5f757f3fSDimitry Andric __DEVICE__ 243*5f757f3fSDimitry Andric float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } 244*5f757f3fSDimitry Andric __DEVICE__ 245*5f757f3fSDimitry Andric float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } 246*5f757f3fSDimitry Andric #else 247*5f757f3fSDimitry Andric __DEVICE__ 248*5f757f3fSDimitry Andric float __frcp_rn(float __x) { return 1.0f / __x; } 249*5f757f3fSDimitry Andric #endif 250*5f757f3fSDimitry Andric 251*5f757f3fSDimitry Andric __DEVICE__ 252*5f757f3fSDimitry Andric float __frsqrt_rn(float __x) { return __builtin_amdgcn_rsqf(__x); } 253*5f757f3fSDimitry Andric 254*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 255*5f757f3fSDimitry Andric __DEVICE__ 256*5f757f3fSDimitry Andric float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } 257*5f757f3fSDimitry Andric __DEVICE__ 258*5f757f3fSDimitry Andric float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } 259*5f757f3fSDimitry Andric __DEVICE__ 260*5f757f3fSDimitry Andric float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } 261*5f757f3fSDimitry Andric __DEVICE__ 262*5f757f3fSDimitry Andric float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } 263*5f757f3fSDimitry Andric #else 264*5f757f3fSDimitry Andric __DEVICE__ 265*5f757f3fSDimitry Andric float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } 266*5f757f3fSDimitry Andric #endif 267*5f757f3fSDimitry Andric 268*5f757f3fSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 269*5f757f3fSDimitry Andric __DEVICE__ 270*5f757f3fSDimitry Andric float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } 271*5f757f3fSDimitry Andric __DEVICE__ 272*5f757f3fSDimitry Andric float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } 273*5f757f3fSDimitry Andric __DEVICE__ 274*5f757f3fSDimitry Andric float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } 275*5f757f3fSDimitry Andric __DEVICE__ 276*5f757f3fSDimitry Andric float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } 277*5f757f3fSDimitry Andric #else 278*5f757f3fSDimitry Andric __DEVICE__ 279*5f757f3fSDimitry Andric float __fsub_rn(float __x, float __y) { return __x - __y; } 280*5f757f3fSDimitry Andric #endif 281*5f757f3fSDimitry Andric 282*5f757f3fSDimitry Andric __DEVICE__ 283*5f757f3fSDimitry Andric float __log10f(float __x) { return __builtin_log10f(__x); } 284*5f757f3fSDimitry Andric 285*5f757f3fSDimitry Andric __DEVICE__ 286*5f757f3fSDimitry Andric float __log2f(float __x) { return __builtin_amdgcn_logf(__x); } 287*5f757f3fSDimitry Andric 288*5f757f3fSDimitry Andric __DEVICE__ 289*5f757f3fSDimitry Andric float __logf(float __x) { return __builtin_logf(__x); } 290*5f757f3fSDimitry Andric 291*5f757f3fSDimitry Andric __DEVICE__ 292*5f757f3fSDimitry Andric float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 293*5f757f3fSDimitry Andric 294*5f757f3fSDimitry Andric __DEVICE__ 295*5f757f3fSDimitry Andric float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } 296*5f757f3fSDimitry Andric 297*5f757f3fSDimitry Andric __DEVICE__ 298*5f757f3fSDimitry Andric void __sincosf(float __x, float *__sinptr, float *__cosptr) { 299*5f757f3fSDimitry Andric *__sinptr = __ocml_native_sin_f32(__x); 300*5f757f3fSDimitry Andric *__cosptr = __ocml_native_cos_f32(__x); 301*5f757f3fSDimitry Andric } 302*5f757f3fSDimitry Andric 303*5f757f3fSDimitry Andric __DEVICE__ 304*5f757f3fSDimitry Andric float __sinf(float __x) { return __ocml_native_sin_f32(__x); } 305*5f757f3fSDimitry Andric 306*5f757f3fSDimitry Andric __DEVICE__ 307*5f757f3fSDimitry Andric float __tanf(float __x) { 308*5f757f3fSDimitry Andric return __sinf(__x) * __builtin_amdgcn_rcpf(__cosf(__x)); 309*5f757f3fSDimitry Andric } 310*5f757f3fSDimitry Andric // END INTRINSICS 311*5f757f3fSDimitry Andric 312e8d8bef9SDimitry Andric #if defined(__cplusplus) 3135ffd83dbSDimitry Andric __DEVICE__ 314e8d8bef9SDimitry Andric int abs(int __x) { 315*5f757f3fSDimitry Andric return __builtin_abs(__x); 3165ffd83dbSDimitry Andric } 3175ffd83dbSDimitry Andric __DEVICE__ 318e8d8bef9SDimitry Andric long labs(long __x) { 319*5f757f3fSDimitry Andric return __builtin_labs(__x); 320e8d8bef9SDimitry Andric } 3215ffd83dbSDimitry Andric __DEVICE__ 322e8d8bef9SDimitry Andric long long llabs(long long __x) { 323*5f757f3fSDimitry Andric return __builtin_llabs(__x); 324e8d8bef9SDimitry Andric } 325e8d8bef9SDimitry Andric #endif 326e8d8bef9SDimitry Andric 3275ffd83dbSDimitry Andric __DEVICE__ 328e8d8bef9SDimitry Andric float acosf(float __x) { return __ocml_acos_f32(__x); } 329e8d8bef9SDimitry Andric 3305ffd83dbSDimitry Andric __DEVICE__ 331e8d8bef9SDimitry Andric float acoshf(float __x) { return __ocml_acosh_f32(__x); } 332e8d8bef9SDimitry Andric 3335ffd83dbSDimitry Andric __DEVICE__ 334e8d8bef9SDimitry Andric float asinf(float __x) { return __ocml_asin_f32(__x); } 335e8d8bef9SDimitry Andric 3365ffd83dbSDimitry Andric __DEVICE__ 337e8d8bef9SDimitry Andric float asinhf(float __x) { return __ocml_asinh_f32(__x); } 338e8d8bef9SDimitry Andric 3395ffd83dbSDimitry Andric __DEVICE__ 340e8d8bef9SDimitry Andric float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } 341e8d8bef9SDimitry Andric 3425ffd83dbSDimitry Andric __DEVICE__ 343e8d8bef9SDimitry Andric float atanf(float __x) { return __ocml_atan_f32(__x); } 344e8d8bef9SDimitry Andric 3455ffd83dbSDimitry Andric __DEVICE__ 346e8d8bef9SDimitry Andric float atanhf(float __x) { return __ocml_atanh_f32(__x); } 347e8d8bef9SDimitry Andric 3485ffd83dbSDimitry Andric __DEVICE__ 349e8d8bef9SDimitry Andric float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } 350e8d8bef9SDimitry Andric 3515ffd83dbSDimitry Andric __DEVICE__ 35206c3fb27SDimitry Andric float ceilf(float __x) { return __builtin_ceilf(__x); } 353e8d8bef9SDimitry Andric 3545ffd83dbSDimitry Andric __DEVICE__ 35506c3fb27SDimitry Andric float copysignf(float __x, float __y) { return __builtin_copysignf(__x, __y); } 356e8d8bef9SDimitry Andric 3575ffd83dbSDimitry Andric __DEVICE__ 358*5f757f3fSDimitry Andric float cosf(float __x) { return __FAST_OR_SLOW(__cosf, __ocml_cos_f32)(__x); } 359e8d8bef9SDimitry Andric 3605ffd83dbSDimitry Andric __DEVICE__ 361e8d8bef9SDimitry Andric float coshf(float __x) { return __ocml_cosh_f32(__x); } 362e8d8bef9SDimitry Andric 3635ffd83dbSDimitry Andric __DEVICE__ 364e8d8bef9SDimitry Andric float cospif(float __x) { return __ocml_cospi_f32(__x); } 365e8d8bef9SDimitry Andric 3665ffd83dbSDimitry Andric __DEVICE__ 367e8d8bef9SDimitry Andric float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } 368e8d8bef9SDimitry Andric 3695ffd83dbSDimitry Andric __DEVICE__ 370e8d8bef9SDimitry Andric float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } 371e8d8bef9SDimitry Andric 3725ffd83dbSDimitry Andric __DEVICE__ 373e8d8bef9SDimitry Andric float erfcf(float __x) { return __ocml_erfc_f32(__x); } 374e8d8bef9SDimitry Andric 3755ffd83dbSDimitry Andric __DEVICE__ 376e8d8bef9SDimitry Andric float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } 377e8d8bef9SDimitry Andric 378e8d8bef9SDimitry Andric __DEVICE__ 379e8d8bef9SDimitry Andric float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } 380e8d8bef9SDimitry Andric 381e8d8bef9SDimitry Andric __DEVICE__ 382e8d8bef9SDimitry Andric float erff(float __x) { return __ocml_erf_f32(__x); } 383e8d8bef9SDimitry Andric 384e8d8bef9SDimitry Andric __DEVICE__ 385e8d8bef9SDimitry Andric float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } 386e8d8bef9SDimitry Andric 387e8d8bef9SDimitry Andric __DEVICE__ 388e8d8bef9SDimitry Andric float exp10f(float __x) { return __ocml_exp10_f32(__x); } 389e8d8bef9SDimitry Andric 390e8d8bef9SDimitry Andric __DEVICE__ 39106c3fb27SDimitry Andric float exp2f(float __x) { return __builtin_exp2f(__x); } 392e8d8bef9SDimitry Andric 393e8d8bef9SDimitry Andric __DEVICE__ 39406c3fb27SDimitry Andric float expf(float __x) { return __builtin_expf(__x); } 395e8d8bef9SDimitry Andric 396e8d8bef9SDimitry Andric __DEVICE__ 397e8d8bef9SDimitry Andric float expm1f(float __x) { return __ocml_expm1_f32(__x); } 398e8d8bef9SDimitry Andric 399e8d8bef9SDimitry Andric __DEVICE__ 400bdd1243dSDimitry Andric float fabsf(float __x) { return __builtin_fabsf(__x); } 401e8d8bef9SDimitry Andric 402e8d8bef9SDimitry Andric __DEVICE__ 403e8d8bef9SDimitry Andric float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } 404e8d8bef9SDimitry Andric 405e8d8bef9SDimitry Andric __DEVICE__ 406e8d8bef9SDimitry Andric float fdividef(float __x, float __y) { return __x / __y; } 407e8d8bef9SDimitry Andric 408e8d8bef9SDimitry Andric __DEVICE__ 40906c3fb27SDimitry Andric float floorf(float __x) { return __builtin_floorf(__x); } 410e8d8bef9SDimitry Andric 411e8d8bef9SDimitry Andric __DEVICE__ 412e8d8bef9SDimitry Andric float fmaf(float __x, float __y, float __z) { 41306c3fb27SDimitry Andric return __builtin_fmaf(__x, __y, __z); 4145ffd83dbSDimitry Andric } 415e8d8bef9SDimitry Andric 4165ffd83dbSDimitry Andric __DEVICE__ 41706c3fb27SDimitry Andric float fmaxf(float __x, float __y) { return __builtin_fmaxf(__x, __y); } 418e8d8bef9SDimitry Andric 4195ffd83dbSDimitry Andric __DEVICE__ 42006c3fb27SDimitry Andric float fminf(float __x, float __y) { return __builtin_fminf(__x, __y); } 421e8d8bef9SDimitry Andric 4225ffd83dbSDimitry Andric __DEVICE__ 423e8d8bef9SDimitry Andric float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } 424e8d8bef9SDimitry Andric 4255ffd83dbSDimitry Andric __DEVICE__ 426e8d8bef9SDimitry Andric float frexpf(float __x, int *__nptr) { 42706c3fb27SDimitry Andric return __builtin_frexpf(__x, __nptr); 4285ffd83dbSDimitry Andric } 429e8d8bef9SDimitry Andric 4305ffd83dbSDimitry Andric __DEVICE__ 431e8d8bef9SDimitry Andric float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } 432e8d8bef9SDimitry Andric 4335ffd83dbSDimitry Andric __DEVICE__ 434e8d8bef9SDimitry Andric int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } 435e8d8bef9SDimitry Andric 4365ffd83dbSDimitry Andric __DEVICE__ 43706c3fb27SDimitry Andric __RETURN_TYPE __finitef(float __x) { return __builtin_isfinite(__x); } 438e8d8bef9SDimitry Andric 4395ffd83dbSDimitry Andric __DEVICE__ 44006c3fb27SDimitry Andric __RETURN_TYPE __isinff(float __x) { return __builtin_isinf(__x); } 441e8d8bef9SDimitry Andric 4425ffd83dbSDimitry Andric __DEVICE__ 44306c3fb27SDimitry Andric __RETURN_TYPE __isnanf(float __x) { return __builtin_isnan(__x); } 444e8d8bef9SDimitry Andric 4455ffd83dbSDimitry Andric __DEVICE__ 446e8d8bef9SDimitry Andric float j0f(float __x) { return __ocml_j0_f32(__x); } 447e8d8bef9SDimitry Andric 4485ffd83dbSDimitry Andric __DEVICE__ 449e8d8bef9SDimitry Andric float j1f(float __x) { return __ocml_j1_f32(__x); } 450e8d8bef9SDimitry Andric 4515ffd83dbSDimitry Andric __DEVICE__ 452e8d8bef9SDimitry Andric float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication 4535ffd83dbSDimitry Andric // and the Miller & Brown algorithm 4545ffd83dbSDimitry Andric // for linear recurrences to get O(log n) steps, but it's unclear if 4555ffd83dbSDimitry Andric // it'd be beneficial in this case. 4565ffd83dbSDimitry Andric if (__n == 0) 4575ffd83dbSDimitry Andric return j0f(__x); 4585ffd83dbSDimitry Andric if (__n == 1) 4595ffd83dbSDimitry Andric return j1f(__x); 4605ffd83dbSDimitry Andric 4615ffd83dbSDimitry Andric float __x0 = j0f(__x); 4625ffd83dbSDimitry Andric float __x1 = j1f(__x); 4635ffd83dbSDimitry Andric for (int __i = 1; __i < __n; ++__i) { 4645ffd83dbSDimitry Andric float __x2 = (2 * __i) / __x * __x1 - __x0; 4655ffd83dbSDimitry Andric __x0 = __x1; 4665ffd83dbSDimitry Andric __x1 = __x2; 4675ffd83dbSDimitry Andric } 4685ffd83dbSDimitry Andric 4695ffd83dbSDimitry Andric return __x1; 4705ffd83dbSDimitry Andric } 471e8d8bef9SDimitry Andric 4725ffd83dbSDimitry Andric __DEVICE__ 47306c3fb27SDimitry Andric float ldexpf(float __x, int __e) { return __builtin_amdgcn_ldexpf(__x, __e); } 474e8d8bef9SDimitry Andric 4755ffd83dbSDimitry Andric __DEVICE__ 476e8d8bef9SDimitry Andric float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } 477e8d8bef9SDimitry Andric 4785ffd83dbSDimitry Andric __DEVICE__ 47906c3fb27SDimitry Andric long long int llrintf(float __x) { return __builtin_rintf(__x); } 480e8d8bef9SDimitry Andric 4815ffd83dbSDimitry Andric __DEVICE__ 48206c3fb27SDimitry Andric long long int llroundf(float __x) { return __builtin_roundf(__x); } 483e8d8bef9SDimitry Andric 4845ffd83dbSDimitry Andric __DEVICE__ 48506c3fb27SDimitry Andric float log10f(float __x) { return __builtin_log10f(__x); } 486e8d8bef9SDimitry Andric 4875ffd83dbSDimitry Andric __DEVICE__ 488e8d8bef9SDimitry Andric float log1pf(float __x) { return __ocml_log1p_f32(__x); } 489e8d8bef9SDimitry Andric 4905ffd83dbSDimitry Andric __DEVICE__ 491*5f757f3fSDimitry Andric float log2f(float __x) { return __FAST_OR_SLOW(__log2f, __ocml_log2_f32)(__x); } 492e8d8bef9SDimitry Andric 4935ffd83dbSDimitry Andric __DEVICE__ 494e8d8bef9SDimitry Andric float logbf(float __x) { return __ocml_logb_f32(__x); } 495e8d8bef9SDimitry Andric 4965ffd83dbSDimitry Andric __DEVICE__ 497*5f757f3fSDimitry Andric float logf(float __x) { return __FAST_OR_SLOW(__logf, __ocml_log_f32)(__x); } 498e8d8bef9SDimitry Andric 4995ffd83dbSDimitry Andric __DEVICE__ 50006c3fb27SDimitry Andric long int lrintf(float __x) { return __builtin_rintf(__x); } 501e8d8bef9SDimitry Andric 5025ffd83dbSDimitry Andric __DEVICE__ 50306c3fb27SDimitry Andric long int lroundf(float __x) { return __builtin_roundf(__x); } 504e8d8bef9SDimitry Andric 5055ffd83dbSDimitry Andric __DEVICE__ 506e8d8bef9SDimitry Andric float modff(float __x, float *__iptr) { 5075ffd83dbSDimitry Andric float __tmp; 50869ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 50969ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 51069ade1e0SDimitry Andric #endif 5115ffd83dbSDimitry Andric float __r = 5125ffd83dbSDimitry Andric __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 5135ffd83dbSDimitry Andric *__iptr = __tmp; 5145ffd83dbSDimitry Andric return __r; 5155ffd83dbSDimitry Andric } 516e8d8bef9SDimitry Andric 5175ffd83dbSDimitry Andric __DEVICE__ 518bdd1243dSDimitry Andric float nanf(const char *__tagp __attribute__((nonnull))) { 5195ffd83dbSDimitry Andric union { 5205ffd83dbSDimitry Andric float val; 5215ffd83dbSDimitry Andric struct ieee_float { 522e8d8bef9SDimitry Andric unsigned int mantissa : 22; 523e8d8bef9SDimitry Andric unsigned int quiet : 1; 524e8d8bef9SDimitry Andric unsigned int exponent : 8; 525e8d8bef9SDimitry Andric unsigned int sign : 1; 5265ffd83dbSDimitry Andric } bits; 5275ffd83dbSDimitry Andric } __tmp; 528e8d8bef9SDimitry Andric __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); 5295ffd83dbSDimitry Andric 5305ffd83dbSDimitry Andric __tmp.bits.sign = 0u; 5315ffd83dbSDimitry Andric __tmp.bits.exponent = ~0u; 5325ffd83dbSDimitry Andric __tmp.bits.quiet = 1u; 5335ffd83dbSDimitry Andric __tmp.bits.mantissa = __make_mantissa(__tagp); 5345ffd83dbSDimitry Andric 5355ffd83dbSDimitry Andric return __tmp.val; 5365ffd83dbSDimitry Andric } 537e8d8bef9SDimitry Andric 5385ffd83dbSDimitry Andric __DEVICE__ 53906c3fb27SDimitry Andric float nearbyintf(float __x) { return __builtin_nearbyintf(__x); } 540e8d8bef9SDimitry Andric 5415ffd83dbSDimitry Andric __DEVICE__ 542e8d8bef9SDimitry Andric float nextafterf(float __x, float __y) { 5435ffd83dbSDimitry Andric return __ocml_nextafter_f32(__x, __y); 5445ffd83dbSDimitry Andric } 545e8d8bef9SDimitry Andric 5465ffd83dbSDimitry Andric __DEVICE__ 547e8d8bef9SDimitry Andric float norm3df(float __x, float __y, float __z) { 5485ffd83dbSDimitry Andric return __ocml_len3_f32(__x, __y, __z); 5495ffd83dbSDimitry Andric } 550e8d8bef9SDimitry Andric 5515ffd83dbSDimitry Andric __DEVICE__ 552e8d8bef9SDimitry Andric float norm4df(float __x, float __y, float __z, float __w) { 5535ffd83dbSDimitry Andric return __ocml_len4_f32(__x, __y, __z, __w); 5545ffd83dbSDimitry Andric } 555e8d8bef9SDimitry Andric 5565ffd83dbSDimitry Andric __DEVICE__ 557e8d8bef9SDimitry Andric float normcdff(float __x) { return __ocml_ncdf_f32(__x); } 558e8d8bef9SDimitry Andric 5595ffd83dbSDimitry Andric __DEVICE__ 560e8d8bef9SDimitry Andric float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } 561e8d8bef9SDimitry Andric 5625ffd83dbSDimitry Andric __DEVICE__ 563e8d8bef9SDimitry Andric float normf(int __dim, 5645ffd83dbSDimitry Andric const float *__a) { // TODO: placeholder until OCML adds support. 5655ffd83dbSDimitry Andric float __r = 0; 5665ffd83dbSDimitry Andric while (__dim--) { 5675ffd83dbSDimitry Andric __r += __a[0] * __a[0]; 5685ffd83dbSDimitry Andric ++__a; 5695ffd83dbSDimitry Andric } 5705ffd83dbSDimitry Andric 571*5f757f3fSDimitry Andric return __builtin_sqrtf(__r); 5725ffd83dbSDimitry Andric } 573e8d8bef9SDimitry Andric 5745ffd83dbSDimitry Andric __DEVICE__ 575e8d8bef9SDimitry Andric float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 576e8d8bef9SDimitry Andric 5775ffd83dbSDimitry Andric __DEVICE__ 578e8d8bef9SDimitry Andric float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } 579e8d8bef9SDimitry Andric 5805ffd83dbSDimitry Andric __DEVICE__ 581e8d8bef9SDimitry Andric float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } 582e8d8bef9SDimitry Andric 583e8d8bef9SDimitry Andric __DEVICE__ 584e8d8bef9SDimitry Andric float remainderf(float __x, float __y) { 5855ffd83dbSDimitry Andric return __ocml_remainder_f32(__x, __y); 5865ffd83dbSDimitry Andric } 587e8d8bef9SDimitry Andric 5885ffd83dbSDimitry Andric __DEVICE__ 589e8d8bef9SDimitry Andric float remquof(float __x, float __y, int *__quo) { 5905ffd83dbSDimitry Andric int __tmp; 59169ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 59269ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 59369ade1e0SDimitry Andric #endif 5945ffd83dbSDimitry Andric float __r = __ocml_remquo_f32( 5955ffd83dbSDimitry Andric __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 5965ffd83dbSDimitry Andric *__quo = __tmp; 5975ffd83dbSDimitry Andric 5985ffd83dbSDimitry Andric return __r; 5995ffd83dbSDimitry Andric } 600e8d8bef9SDimitry Andric 6015ffd83dbSDimitry Andric __DEVICE__ 602e8d8bef9SDimitry Andric float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } 603e8d8bef9SDimitry Andric 6045ffd83dbSDimitry Andric __DEVICE__ 60506c3fb27SDimitry Andric float rintf(float __x) { return __builtin_rintf(__x); } 606e8d8bef9SDimitry Andric 6075ffd83dbSDimitry Andric __DEVICE__ 608e8d8bef9SDimitry Andric float rnorm3df(float __x, float __y, float __z) { 6095ffd83dbSDimitry Andric return __ocml_rlen3_f32(__x, __y, __z); 6105ffd83dbSDimitry Andric } 6115ffd83dbSDimitry Andric 6125ffd83dbSDimitry Andric __DEVICE__ 613e8d8bef9SDimitry Andric float rnorm4df(float __x, float __y, float __z, float __w) { 6145ffd83dbSDimitry Andric return __ocml_rlen4_f32(__x, __y, __z, __w); 6155ffd83dbSDimitry Andric } 616e8d8bef9SDimitry Andric 6175ffd83dbSDimitry Andric __DEVICE__ 618e8d8bef9SDimitry Andric float rnormf(int __dim, 6195ffd83dbSDimitry Andric const float *__a) { // TODO: placeholder until OCML adds support. 6205ffd83dbSDimitry Andric float __r = 0; 6215ffd83dbSDimitry Andric while (__dim--) { 6225ffd83dbSDimitry Andric __r += __a[0] * __a[0]; 6235ffd83dbSDimitry Andric ++__a; 6245ffd83dbSDimitry Andric } 6255ffd83dbSDimitry Andric 6265ffd83dbSDimitry Andric return __ocml_rsqrt_f32(__r); 6275ffd83dbSDimitry Andric } 628e8d8bef9SDimitry Andric 6295ffd83dbSDimitry Andric __DEVICE__ 63006c3fb27SDimitry Andric float roundf(float __x) { return __builtin_roundf(__x); } 631e8d8bef9SDimitry Andric 6325ffd83dbSDimitry Andric __DEVICE__ 633e8d8bef9SDimitry Andric float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } 634e8d8bef9SDimitry Andric 6355ffd83dbSDimitry Andric __DEVICE__ 636e8d8bef9SDimitry Andric float scalblnf(float __x, long int __n) { 63706c3fb27SDimitry Andric return (__n < INT_MAX) ? __builtin_amdgcn_ldexpf(__x, __n) 6385ffd83dbSDimitry Andric : __ocml_scalb_f32(__x, __n); 6395ffd83dbSDimitry Andric } 6405ffd83dbSDimitry Andric 641e8d8bef9SDimitry Andric __DEVICE__ 64206c3fb27SDimitry Andric float scalbnf(float __x, int __n) { return __builtin_amdgcn_ldexpf(__x, __n); } 643e8d8bef9SDimitry Andric 644e8d8bef9SDimitry Andric __DEVICE__ 64506c3fb27SDimitry Andric __RETURN_TYPE __signbitf(float __x) { return __builtin_signbitf(__x); } 646e8d8bef9SDimitry Andric 647e8d8bef9SDimitry Andric __DEVICE__ 648e8d8bef9SDimitry Andric void sincosf(float __x, float *__sinptr, float *__cosptr) { 649e8d8bef9SDimitry Andric float __tmp; 65069ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 65169ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 65269ade1e0SDimitry Andric #endif 653*5f757f3fSDimitry Andric #ifdef __CLANG_CUDA_APPROX_TRANSCENDENTALS__ 654*5f757f3fSDimitry Andric __sincosf(__x, __sinptr, __cosptr); 655*5f757f3fSDimitry Andric #else 6565ffd83dbSDimitry Andric *__sinptr = 6575ffd83dbSDimitry Andric __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 6585ffd83dbSDimitry Andric *__cosptr = __tmp; 659*5f757f3fSDimitry Andric #endif 6605ffd83dbSDimitry Andric } 6615ffd83dbSDimitry Andric 662e8d8bef9SDimitry Andric __DEVICE__ 663e8d8bef9SDimitry Andric void sincospif(float __x, float *__sinptr, float *__cosptr) { 664e8d8bef9SDimitry Andric float __tmp; 66569ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 66669ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 66769ade1e0SDimitry Andric #endif 6685ffd83dbSDimitry Andric *__sinptr = __ocml_sincospi_f32( 6695ffd83dbSDimitry Andric __x, (__attribute__((address_space(5))) float *)&__tmp); 6705ffd83dbSDimitry Andric *__cosptr = __tmp; 6715ffd83dbSDimitry Andric } 672e8d8bef9SDimitry Andric 6735ffd83dbSDimitry Andric __DEVICE__ 674*5f757f3fSDimitry Andric float sinf(float __x) { return __FAST_OR_SLOW(__sinf, __ocml_sin_f32)(__x); } 675e8d8bef9SDimitry Andric 6765ffd83dbSDimitry Andric __DEVICE__ 677e8d8bef9SDimitry Andric float sinhf(float __x) { return __ocml_sinh_f32(__x); } 678e8d8bef9SDimitry Andric 6795ffd83dbSDimitry Andric __DEVICE__ 680e8d8bef9SDimitry Andric float sinpif(float __x) { return __ocml_sinpi_f32(__x); } 681e8d8bef9SDimitry Andric 6825ffd83dbSDimitry Andric __DEVICE__ 683*5f757f3fSDimitry Andric float sqrtf(float __x) { return __builtin_sqrtf(__x); } 684e8d8bef9SDimitry Andric 6855ffd83dbSDimitry Andric __DEVICE__ 686e8d8bef9SDimitry Andric float tanf(float __x) { return __ocml_tan_f32(__x); } 687e8d8bef9SDimitry Andric 6885ffd83dbSDimitry Andric __DEVICE__ 689e8d8bef9SDimitry Andric float tanhf(float __x) { return __ocml_tanh_f32(__x); } 690e8d8bef9SDimitry Andric 6915ffd83dbSDimitry Andric __DEVICE__ 692e8d8bef9SDimitry Andric float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } 693e8d8bef9SDimitry Andric 6945ffd83dbSDimitry Andric __DEVICE__ 69506c3fb27SDimitry Andric float truncf(float __x) { return __builtin_truncf(__x); } 696e8d8bef9SDimitry Andric 6975ffd83dbSDimitry Andric __DEVICE__ 698e8d8bef9SDimitry Andric float y0f(float __x) { return __ocml_y0_f32(__x); } 699e8d8bef9SDimitry Andric 7005ffd83dbSDimitry Andric __DEVICE__ 701e8d8bef9SDimitry Andric float y1f(float __x) { return __ocml_y1_f32(__x); } 702e8d8bef9SDimitry Andric 7035ffd83dbSDimitry Andric __DEVICE__ 704e8d8bef9SDimitry Andric float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication 7055ffd83dbSDimitry Andric // and the Miller & Brown algorithm 7065ffd83dbSDimitry Andric // for linear recurrences to get O(log n) steps, but it's unclear if 7075ffd83dbSDimitry Andric // it'd be beneficial in this case. Placeholder until OCML adds 7085ffd83dbSDimitry Andric // support. 7095ffd83dbSDimitry Andric if (__n == 0) 7105ffd83dbSDimitry Andric return y0f(__x); 7115ffd83dbSDimitry Andric if (__n == 1) 7125ffd83dbSDimitry Andric return y1f(__x); 7135ffd83dbSDimitry Andric 7145ffd83dbSDimitry Andric float __x0 = y0f(__x); 7155ffd83dbSDimitry Andric float __x1 = y1f(__x); 7165ffd83dbSDimitry Andric for (int __i = 1; __i < __n; ++__i) { 7175ffd83dbSDimitry Andric float __x2 = (2 * __i) / __x * __x1 - __x0; 7185ffd83dbSDimitry Andric __x0 = __x1; 7195ffd83dbSDimitry Andric __x1 = __x2; 7205ffd83dbSDimitry Andric } 7215ffd83dbSDimitry Andric 7225ffd83dbSDimitry Andric return __x1; 7235ffd83dbSDimitry Andric } 7245ffd83dbSDimitry Andric 725e8d8bef9SDimitry Andric 7265ffd83dbSDimitry Andric // END FLOAT 7275ffd83dbSDimitry Andric 7285ffd83dbSDimitry Andric // BEGIN DOUBLE 7295ffd83dbSDimitry Andric __DEVICE__ 730e8d8bef9SDimitry Andric double acos(double __x) { return __ocml_acos_f64(__x); } 731e8d8bef9SDimitry Andric 7325ffd83dbSDimitry Andric __DEVICE__ 733e8d8bef9SDimitry Andric double acosh(double __x) { return __ocml_acosh_f64(__x); } 734e8d8bef9SDimitry Andric 7355ffd83dbSDimitry Andric __DEVICE__ 736e8d8bef9SDimitry Andric double asin(double __x) { return __ocml_asin_f64(__x); } 737e8d8bef9SDimitry Andric 7385ffd83dbSDimitry Andric __DEVICE__ 739e8d8bef9SDimitry Andric double asinh(double __x) { return __ocml_asinh_f64(__x); } 740e8d8bef9SDimitry Andric 7415ffd83dbSDimitry Andric __DEVICE__ 742e8d8bef9SDimitry Andric double atan(double __x) { return __ocml_atan_f64(__x); } 743e8d8bef9SDimitry Andric 7445ffd83dbSDimitry Andric __DEVICE__ 745e8d8bef9SDimitry Andric double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } 746e8d8bef9SDimitry Andric 7475ffd83dbSDimitry Andric __DEVICE__ 748e8d8bef9SDimitry Andric double atanh(double __x) { return __ocml_atanh_f64(__x); } 749e8d8bef9SDimitry Andric 7505ffd83dbSDimitry Andric __DEVICE__ 751e8d8bef9SDimitry Andric double cbrt(double __x) { return __ocml_cbrt_f64(__x); } 752e8d8bef9SDimitry Andric 7535ffd83dbSDimitry Andric __DEVICE__ 75406c3fb27SDimitry Andric double ceil(double __x) { return __builtin_ceil(__x); } 755e8d8bef9SDimitry Andric 7565ffd83dbSDimitry Andric __DEVICE__ 757e8d8bef9SDimitry Andric double copysign(double __x, double __y) { 75806c3fb27SDimitry Andric return __builtin_copysign(__x, __y); 7595ffd83dbSDimitry Andric } 760e8d8bef9SDimitry Andric 7615ffd83dbSDimitry Andric __DEVICE__ 762e8d8bef9SDimitry Andric double cos(double __x) { return __ocml_cos_f64(__x); } 763e8d8bef9SDimitry Andric 7645ffd83dbSDimitry Andric __DEVICE__ 765e8d8bef9SDimitry Andric double cosh(double __x) { return __ocml_cosh_f64(__x); } 766e8d8bef9SDimitry Andric 7675ffd83dbSDimitry Andric __DEVICE__ 768e8d8bef9SDimitry Andric double cospi(double __x) { return __ocml_cospi_f64(__x); } 769e8d8bef9SDimitry Andric 7705ffd83dbSDimitry Andric __DEVICE__ 771e8d8bef9SDimitry Andric double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } 772e8d8bef9SDimitry Andric 7735ffd83dbSDimitry Andric __DEVICE__ 774e8d8bef9SDimitry Andric double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } 775e8d8bef9SDimitry Andric 7765ffd83dbSDimitry Andric __DEVICE__ 777e8d8bef9SDimitry Andric double erf(double __x) { return __ocml_erf_f64(__x); } 778e8d8bef9SDimitry Andric 7795ffd83dbSDimitry Andric __DEVICE__ 780e8d8bef9SDimitry Andric double erfc(double __x) { return __ocml_erfc_f64(__x); } 781e8d8bef9SDimitry Andric 7825ffd83dbSDimitry Andric __DEVICE__ 783e8d8bef9SDimitry Andric double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } 784e8d8bef9SDimitry Andric 7855ffd83dbSDimitry Andric __DEVICE__ 786e8d8bef9SDimitry Andric double erfcx(double __x) { return __ocml_erfcx_f64(__x); } 787e8d8bef9SDimitry Andric 7885ffd83dbSDimitry Andric __DEVICE__ 789e8d8bef9SDimitry Andric double erfinv(double __x) { return __ocml_erfinv_f64(__x); } 790e8d8bef9SDimitry Andric 7915ffd83dbSDimitry Andric __DEVICE__ 792e8d8bef9SDimitry Andric double exp(double __x) { return __ocml_exp_f64(__x); } 793e8d8bef9SDimitry Andric 7945ffd83dbSDimitry Andric __DEVICE__ 795e8d8bef9SDimitry Andric double exp10(double __x) { return __ocml_exp10_f64(__x); } 796e8d8bef9SDimitry Andric 7975ffd83dbSDimitry Andric __DEVICE__ 798e8d8bef9SDimitry Andric double exp2(double __x) { return __ocml_exp2_f64(__x); } 799e8d8bef9SDimitry Andric 8005ffd83dbSDimitry Andric __DEVICE__ 801e8d8bef9SDimitry Andric double expm1(double __x) { return __ocml_expm1_f64(__x); } 802e8d8bef9SDimitry Andric 8035ffd83dbSDimitry Andric __DEVICE__ 804bdd1243dSDimitry Andric double fabs(double __x) { return __builtin_fabs(__x); } 805e8d8bef9SDimitry Andric 8065ffd83dbSDimitry Andric __DEVICE__ 807e8d8bef9SDimitry Andric double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } 808e8d8bef9SDimitry Andric 8095ffd83dbSDimitry Andric __DEVICE__ 81006c3fb27SDimitry Andric double floor(double __x) { return __builtin_floor(__x); } 811e8d8bef9SDimitry Andric 8125ffd83dbSDimitry Andric __DEVICE__ 813e8d8bef9SDimitry Andric double fma(double __x, double __y, double __z) { 81406c3fb27SDimitry Andric return __builtin_fma(__x, __y, __z); 8155ffd83dbSDimitry Andric } 816e8d8bef9SDimitry Andric 8175ffd83dbSDimitry Andric __DEVICE__ 81806c3fb27SDimitry Andric double fmax(double __x, double __y) { return __builtin_fmax(__x, __y); } 819e8d8bef9SDimitry Andric 8205ffd83dbSDimitry Andric __DEVICE__ 82106c3fb27SDimitry Andric double fmin(double __x, double __y) { return __builtin_fmin(__x, __y); } 822e8d8bef9SDimitry Andric 8235ffd83dbSDimitry Andric __DEVICE__ 824e8d8bef9SDimitry Andric double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } 825e8d8bef9SDimitry Andric 8265ffd83dbSDimitry Andric __DEVICE__ 827e8d8bef9SDimitry Andric double frexp(double __x, int *__nptr) { 82806c3fb27SDimitry Andric return __builtin_frexp(__x, __nptr); 8295ffd83dbSDimitry Andric } 830e8d8bef9SDimitry Andric 8315ffd83dbSDimitry Andric __DEVICE__ 832e8d8bef9SDimitry Andric double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } 833e8d8bef9SDimitry Andric 8345ffd83dbSDimitry Andric __DEVICE__ 835e8d8bef9SDimitry Andric int ilogb(double __x) { return __ocml_ilogb_f64(__x); } 836e8d8bef9SDimitry Andric 8375ffd83dbSDimitry Andric __DEVICE__ 83806c3fb27SDimitry Andric __RETURN_TYPE __finite(double __x) { return __builtin_isfinite(__x); } 839e8d8bef9SDimitry Andric 8405ffd83dbSDimitry Andric __DEVICE__ 84106c3fb27SDimitry Andric __RETURN_TYPE __isinf(double __x) { return __builtin_isinf(__x); } 842e8d8bef9SDimitry Andric 8435ffd83dbSDimitry Andric __DEVICE__ 84406c3fb27SDimitry Andric __RETURN_TYPE __isnan(double __x) { return __builtin_isnan(__x); } 845e8d8bef9SDimitry Andric 8465ffd83dbSDimitry Andric __DEVICE__ 847e8d8bef9SDimitry Andric double j0(double __x) { return __ocml_j0_f64(__x); } 848e8d8bef9SDimitry Andric 8495ffd83dbSDimitry Andric __DEVICE__ 850e8d8bef9SDimitry Andric double j1(double __x) { return __ocml_j1_f64(__x); } 851e8d8bef9SDimitry Andric 8525ffd83dbSDimitry Andric __DEVICE__ 853e8d8bef9SDimitry Andric double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication 8545ffd83dbSDimitry Andric // and the Miller & Brown algorithm 8555ffd83dbSDimitry Andric // for linear recurrences to get O(log n) steps, but it's unclear if 8565ffd83dbSDimitry Andric // it'd be beneficial in this case. Placeholder until OCML adds 8575ffd83dbSDimitry Andric // support. 8585ffd83dbSDimitry Andric if (__n == 0) 859e8d8bef9SDimitry Andric return j0(__x); 8605ffd83dbSDimitry Andric if (__n == 1) 861e8d8bef9SDimitry Andric return j1(__x); 8625ffd83dbSDimitry Andric 863e8d8bef9SDimitry Andric double __x0 = j0(__x); 864e8d8bef9SDimitry Andric double __x1 = j1(__x); 8655ffd83dbSDimitry Andric for (int __i = 1; __i < __n; ++__i) { 8665ffd83dbSDimitry Andric double __x2 = (2 * __i) / __x * __x1 - __x0; 8675ffd83dbSDimitry Andric __x0 = __x1; 8685ffd83dbSDimitry Andric __x1 = __x2; 8695ffd83dbSDimitry Andric } 8705ffd83dbSDimitry Andric return __x1; 8715ffd83dbSDimitry Andric } 872e8d8bef9SDimitry Andric 8735ffd83dbSDimitry Andric __DEVICE__ 87406c3fb27SDimitry Andric double ldexp(double __x, int __e) { return __builtin_amdgcn_ldexp(__x, __e); } 875e8d8bef9SDimitry Andric 8765ffd83dbSDimitry Andric __DEVICE__ 877e8d8bef9SDimitry Andric double lgamma(double __x) { return __ocml_lgamma_f64(__x); } 878e8d8bef9SDimitry Andric 8795ffd83dbSDimitry Andric __DEVICE__ 88006c3fb27SDimitry Andric long long int llrint(double __x) { return __builtin_rint(__x); } 881e8d8bef9SDimitry Andric 8825ffd83dbSDimitry Andric __DEVICE__ 88306c3fb27SDimitry Andric long long int llround(double __x) { return __builtin_round(__x); } 884e8d8bef9SDimitry Andric 8855ffd83dbSDimitry Andric __DEVICE__ 886e8d8bef9SDimitry Andric double log(double __x) { return __ocml_log_f64(__x); } 887e8d8bef9SDimitry Andric 8885ffd83dbSDimitry Andric __DEVICE__ 889e8d8bef9SDimitry Andric double log10(double __x) { return __ocml_log10_f64(__x); } 890e8d8bef9SDimitry Andric 8915ffd83dbSDimitry Andric __DEVICE__ 892e8d8bef9SDimitry Andric double log1p(double __x) { return __ocml_log1p_f64(__x); } 893e8d8bef9SDimitry Andric 8945ffd83dbSDimitry Andric __DEVICE__ 895e8d8bef9SDimitry Andric double log2(double __x) { return __ocml_log2_f64(__x); } 896e8d8bef9SDimitry Andric 8975ffd83dbSDimitry Andric __DEVICE__ 898e8d8bef9SDimitry Andric double logb(double __x) { return __ocml_logb_f64(__x); } 899e8d8bef9SDimitry Andric 9005ffd83dbSDimitry Andric __DEVICE__ 90106c3fb27SDimitry Andric long int lrint(double __x) { return __builtin_rint(__x); } 902e8d8bef9SDimitry Andric 9035ffd83dbSDimitry Andric __DEVICE__ 90406c3fb27SDimitry Andric long int lround(double __x) { return __builtin_round(__x); } 905e8d8bef9SDimitry Andric 9065ffd83dbSDimitry Andric __DEVICE__ 907e8d8bef9SDimitry Andric double modf(double __x, double *__iptr) { 9085ffd83dbSDimitry Andric double __tmp; 90969ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 91069ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 91169ade1e0SDimitry Andric #endif 9125ffd83dbSDimitry Andric double __r = 9135ffd83dbSDimitry Andric __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); 9145ffd83dbSDimitry Andric *__iptr = __tmp; 9155ffd83dbSDimitry Andric 9165ffd83dbSDimitry Andric return __r; 9175ffd83dbSDimitry Andric } 918e8d8bef9SDimitry Andric 9195ffd83dbSDimitry Andric __DEVICE__ 920e8d8bef9SDimitry Andric double nan(const char *__tagp) { 9215ffd83dbSDimitry Andric #if !_WIN32 9225ffd83dbSDimitry Andric union { 9235ffd83dbSDimitry Andric double val; 9245ffd83dbSDimitry Andric struct ieee_double { 9255ffd83dbSDimitry Andric uint64_t mantissa : 51; 9265ffd83dbSDimitry Andric uint32_t quiet : 1; 9275ffd83dbSDimitry Andric uint32_t exponent : 11; 9285ffd83dbSDimitry Andric uint32_t sign : 1; 9295ffd83dbSDimitry Andric } bits; 9305ffd83dbSDimitry Andric } __tmp; 931e8d8bef9SDimitry Andric __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); 9325ffd83dbSDimitry Andric 9335ffd83dbSDimitry Andric __tmp.bits.sign = 0u; 9345ffd83dbSDimitry Andric __tmp.bits.exponent = ~0u; 9355ffd83dbSDimitry Andric __tmp.bits.quiet = 1u; 9365ffd83dbSDimitry Andric __tmp.bits.mantissa = __make_mantissa(__tagp); 9375ffd83dbSDimitry Andric 9385ffd83dbSDimitry Andric return __tmp.val; 9395ffd83dbSDimitry Andric #else 940e8d8bef9SDimitry Andric __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); 941e8d8bef9SDimitry Andric uint64_t __val = __make_mantissa(__tagp); 942e8d8bef9SDimitry Andric __val |= 0xFFF << 51; 943e8d8bef9SDimitry Andric return *reinterpret_cast<double *>(&__val); 9445ffd83dbSDimitry Andric #endif 9455ffd83dbSDimitry Andric } 946e8d8bef9SDimitry Andric 9475ffd83dbSDimitry Andric __DEVICE__ 94806c3fb27SDimitry Andric double nearbyint(double __x) { return __builtin_nearbyint(__x); } 949e8d8bef9SDimitry Andric 9505ffd83dbSDimitry Andric __DEVICE__ 951e8d8bef9SDimitry Andric double nextafter(double __x, double __y) { 9525ffd83dbSDimitry Andric return __ocml_nextafter_f64(__x, __y); 9535ffd83dbSDimitry Andric } 954e8d8bef9SDimitry Andric 9555ffd83dbSDimitry Andric __DEVICE__ 956e8d8bef9SDimitry Andric double norm(int __dim, 9575ffd83dbSDimitry Andric const double *__a) { // TODO: placeholder until OCML adds support. 9585ffd83dbSDimitry Andric double __r = 0; 9595ffd83dbSDimitry Andric while (__dim--) { 9605ffd83dbSDimitry Andric __r += __a[0] * __a[0]; 9615ffd83dbSDimitry Andric ++__a; 9625ffd83dbSDimitry Andric } 9635ffd83dbSDimitry Andric 964*5f757f3fSDimitry Andric return __builtin_sqrt(__r); 9655ffd83dbSDimitry Andric } 966e8d8bef9SDimitry Andric 9675ffd83dbSDimitry Andric __DEVICE__ 968e8d8bef9SDimitry Andric double norm3d(double __x, double __y, double __z) { 9695ffd83dbSDimitry Andric return __ocml_len3_f64(__x, __y, __z); 9705ffd83dbSDimitry Andric } 971e8d8bef9SDimitry Andric 9725ffd83dbSDimitry Andric __DEVICE__ 973e8d8bef9SDimitry Andric double norm4d(double __x, double __y, double __z, double __w) { 9745ffd83dbSDimitry Andric return __ocml_len4_f64(__x, __y, __z, __w); 9755ffd83dbSDimitry Andric } 976e8d8bef9SDimitry Andric 9775ffd83dbSDimitry Andric __DEVICE__ 978e8d8bef9SDimitry Andric double normcdf(double __x) { return __ocml_ncdf_f64(__x); } 979e8d8bef9SDimitry Andric 9805ffd83dbSDimitry Andric __DEVICE__ 981e8d8bef9SDimitry Andric double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } 982e8d8bef9SDimitry Andric 9835ffd83dbSDimitry Andric __DEVICE__ 984e8d8bef9SDimitry Andric double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } 985e8d8bef9SDimitry Andric 9865ffd83dbSDimitry Andric __DEVICE__ 987e8d8bef9SDimitry Andric double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } 988e8d8bef9SDimitry Andric 9895ffd83dbSDimitry Andric __DEVICE__ 990e8d8bef9SDimitry Andric double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } 991e8d8bef9SDimitry Andric 992e8d8bef9SDimitry Andric __DEVICE__ 993e8d8bef9SDimitry Andric double remainder(double __x, double __y) { 9945ffd83dbSDimitry Andric return __ocml_remainder_f64(__x, __y); 9955ffd83dbSDimitry Andric } 996e8d8bef9SDimitry Andric 9975ffd83dbSDimitry Andric __DEVICE__ 998e8d8bef9SDimitry Andric double remquo(double __x, double __y, int *__quo) { 9995ffd83dbSDimitry Andric int __tmp; 100069ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 100169ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 100269ade1e0SDimitry Andric #endif 10035ffd83dbSDimitry Andric double __r = __ocml_remquo_f64( 10045ffd83dbSDimitry Andric __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 10055ffd83dbSDimitry Andric *__quo = __tmp; 10065ffd83dbSDimitry Andric 10075ffd83dbSDimitry Andric return __r; 10085ffd83dbSDimitry Andric } 1009e8d8bef9SDimitry Andric 10105ffd83dbSDimitry Andric __DEVICE__ 1011e8d8bef9SDimitry Andric double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } 1012e8d8bef9SDimitry Andric 10135ffd83dbSDimitry Andric __DEVICE__ 101406c3fb27SDimitry Andric double rint(double __x) { return __builtin_rint(__x); } 1015e8d8bef9SDimitry Andric 10165ffd83dbSDimitry Andric __DEVICE__ 1017e8d8bef9SDimitry Andric double rnorm(int __dim, 10185ffd83dbSDimitry Andric const double *__a) { // TODO: placeholder until OCML adds support. 10195ffd83dbSDimitry Andric double __r = 0; 10205ffd83dbSDimitry Andric while (__dim--) { 10215ffd83dbSDimitry Andric __r += __a[0] * __a[0]; 10225ffd83dbSDimitry Andric ++__a; 10235ffd83dbSDimitry Andric } 10245ffd83dbSDimitry Andric 10255ffd83dbSDimitry Andric return __ocml_rsqrt_f64(__r); 10265ffd83dbSDimitry Andric } 1027e8d8bef9SDimitry Andric 10285ffd83dbSDimitry Andric __DEVICE__ 1029e8d8bef9SDimitry Andric double rnorm3d(double __x, double __y, double __z) { 10305ffd83dbSDimitry Andric return __ocml_rlen3_f64(__x, __y, __z); 10315ffd83dbSDimitry Andric } 1032e8d8bef9SDimitry Andric 10335ffd83dbSDimitry Andric __DEVICE__ 1034e8d8bef9SDimitry Andric double rnorm4d(double __x, double __y, double __z, double __w) { 10355ffd83dbSDimitry Andric return __ocml_rlen4_f64(__x, __y, __z, __w); 10365ffd83dbSDimitry Andric } 1037e8d8bef9SDimitry Andric 10385ffd83dbSDimitry Andric __DEVICE__ 103906c3fb27SDimitry Andric double round(double __x) { return __builtin_round(__x); } 1040e8d8bef9SDimitry Andric 10415ffd83dbSDimitry Andric __DEVICE__ 1042e8d8bef9SDimitry Andric double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } 1043e8d8bef9SDimitry Andric 10445ffd83dbSDimitry Andric __DEVICE__ 1045e8d8bef9SDimitry Andric double scalbln(double __x, long int __n) { 104606c3fb27SDimitry Andric return (__n < INT_MAX) ? __builtin_amdgcn_ldexp(__x, __n) 10475ffd83dbSDimitry Andric : __ocml_scalb_f64(__x, __n); 10485ffd83dbSDimitry Andric } 10495ffd83dbSDimitry Andric __DEVICE__ 105006c3fb27SDimitry Andric double scalbn(double __x, int __n) { return __builtin_amdgcn_ldexp(__x, __n); } 1051e8d8bef9SDimitry Andric 10525ffd83dbSDimitry Andric __DEVICE__ 105306c3fb27SDimitry Andric __RETURN_TYPE __signbit(double __x) { return __builtin_signbit(__x); } 1054e8d8bef9SDimitry Andric 10555ffd83dbSDimitry Andric __DEVICE__ 1056e8d8bef9SDimitry Andric double sin(double __x) { return __ocml_sin_f64(__x); } 1057e8d8bef9SDimitry Andric 10585ffd83dbSDimitry Andric __DEVICE__ 1059e8d8bef9SDimitry Andric void sincos(double __x, double *__sinptr, double *__cosptr) { 10605ffd83dbSDimitry Andric double __tmp; 106169ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 106269ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 106369ade1e0SDimitry Andric #endif 10645ffd83dbSDimitry Andric *__sinptr = __ocml_sincos_f64( 10655ffd83dbSDimitry Andric __x, (__attribute__((address_space(5))) double *)&__tmp); 10665ffd83dbSDimitry Andric *__cosptr = __tmp; 10675ffd83dbSDimitry Andric } 1068e8d8bef9SDimitry Andric 10695ffd83dbSDimitry Andric __DEVICE__ 1070e8d8bef9SDimitry Andric void sincospi(double __x, double *__sinptr, double *__cosptr) { 10715ffd83dbSDimitry Andric double __tmp; 107269ade1e0SDimitry Andric #ifdef __OPENMP_AMDGCN__ 107369ade1e0SDimitry Andric #pragma omp allocate(__tmp) allocator(omp_thread_mem_alloc) 107469ade1e0SDimitry Andric #endif 10755ffd83dbSDimitry Andric *__sinptr = __ocml_sincospi_f64( 10765ffd83dbSDimitry Andric __x, (__attribute__((address_space(5))) double *)&__tmp); 10775ffd83dbSDimitry Andric *__cosptr = __tmp; 10785ffd83dbSDimitry Andric } 1079e8d8bef9SDimitry Andric 10805ffd83dbSDimitry Andric __DEVICE__ 1081e8d8bef9SDimitry Andric double sinh(double __x) { return __ocml_sinh_f64(__x); } 1082e8d8bef9SDimitry Andric 10835ffd83dbSDimitry Andric __DEVICE__ 1084e8d8bef9SDimitry Andric double sinpi(double __x) { return __ocml_sinpi_f64(__x); } 1085e8d8bef9SDimitry Andric 10865ffd83dbSDimitry Andric __DEVICE__ 1087*5f757f3fSDimitry Andric double sqrt(double __x) { return __builtin_sqrt(__x); } 1088e8d8bef9SDimitry Andric 10895ffd83dbSDimitry Andric __DEVICE__ 1090e8d8bef9SDimitry Andric double tan(double __x) { return __ocml_tan_f64(__x); } 1091e8d8bef9SDimitry Andric 10925ffd83dbSDimitry Andric __DEVICE__ 1093e8d8bef9SDimitry Andric double tanh(double __x) { return __ocml_tanh_f64(__x); } 1094e8d8bef9SDimitry Andric 10955ffd83dbSDimitry Andric __DEVICE__ 1096e8d8bef9SDimitry Andric double tgamma(double __x) { return __ocml_tgamma_f64(__x); } 1097e8d8bef9SDimitry Andric 10985ffd83dbSDimitry Andric __DEVICE__ 109906c3fb27SDimitry Andric double trunc(double __x) { return __builtin_trunc(__x); } 1100e8d8bef9SDimitry Andric 11015ffd83dbSDimitry Andric __DEVICE__ 1102e8d8bef9SDimitry Andric double y0(double __x) { return __ocml_y0_f64(__x); } 1103e8d8bef9SDimitry Andric 11045ffd83dbSDimitry Andric __DEVICE__ 1105e8d8bef9SDimitry Andric double y1(double __x) { return __ocml_y1_f64(__x); } 1106e8d8bef9SDimitry Andric 11075ffd83dbSDimitry Andric __DEVICE__ 1108e8d8bef9SDimitry Andric double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication 11095ffd83dbSDimitry Andric // and the Miller & Brown algorithm 11105ffd83dbSDimitry Andric // for linear recurrences to get O(log n) steps, but it's unclear if 11115ffd83dbSDimitry Andric // it'd be beneficial in this case. Placeholder until OCML adds 11125ffd83dbSDimitry Andric // support. 11135ffd83dbSDimitry Andric if (__n == 0) 1114e8d8bef9SDimitry Andric return y0(__x); 11155ffd83dbSDimitry Andric if (__n == 1) 1116e8d8bef9SDimitry Andric return y1(__x); 11175ffd83dbSDimitry Andric 1118e8d8bef9SDimitry Andric double __x0 = y0(__x); 1119e8d8bef9SDimitry Andric double __x1 = y1(__x); 11205ffd83dbSDimitry Andric for (int __i = 1; __i < __n; ++__i) { 11215ffd83dbSDimitry Andric double __x2 = (2 * __i) / __x * __x1 - __x0; 11225ffd83dbSDimitry Andric __x0 = __x1; 11235ffd83dbSDimitry Andric __x1 = __x2; 11245ffd83dbSDimitry Andric } 11255ffd83dbSDimitry Andric 11265ffd83dbSDimitry Andric return __x1; 11275ffd83dbSDimitry Andric } 11285ffd83dbSDimitry Andric 11295ffd83dbSDimitry Andric // BEGIN INTRINSICS 11305ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 11315ffd83dbSDimitry Andric __DEVICE__ 1132e8d8bef9SDimitry Andric double __dadd_rd(double __x, double __y) { 11335ffd83dbSDimitry Andric return __ocml_add_rtn_f64(__x, __y); 11345ffd83dbSDimitry Andric } 11355ffd83dbSDimitry Andric __DEVICE__ 1136e8d8bef9SDimitry Andric double __dadd_rn(double __x, double __y) { 1137e8d8bef9SDimitry Andric return __ocml_add_rte_f64(__x, __y); 1138e8d8bef9SDimitry Andric } 11395ffd83dbSDimitry Andric __DEVICE__ 1140e8d8bef9SDimitry Andric double __dadd_ru(double __x, double __y) { 11415ffd83dbSDimitry Andric return __ocml_add_rtp_f64(__x, __y); 11425ffd83dbSDimitry Andric } 11435ffd83dbSDimitry Andric __DEVICE__ 1144e8d8bef9SDimitry Andric double __dadd_rz(double __x, double __y) { 11455ffd83dbSDimitry Andric return __ocml_add_rtz_f64(__x, __y); 11465ffd83dbSDimitry Andric } 1147e8d8bef9SDimitry Andric #else 11485ffd83dbSDimitry Andric __DEVICE__ 1149e8d8bef9SDimitry Andric double __dadd_rn(double __x, double __y) { return __x + __y; } 11505ffd83dbSDimitry Andric #endif 1151e8d8bef9SDimitry Andric 11525ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 11535ffd83dbSDimitry Andric __DEVICE__ 1154e8d8bef9SDimitry Andric double __ddiv_rd(double __x, double __y) { 1155e8d8bef9SDimitry Andric return __ocml_div_rtn_f64(__x, __y); 1156e8d8bef9SDimitry Andric } 1157e8d8bef9SDimitry Andric __DEVICE__ 1158e8d8bef9SDimitry Andric double __ddiv_rn(double __x, double __y) { 1159e8d8bef9SDimitry Andric return __ocml_div_rte_f64(__x, __y); 1160e8d8bef9SDimitry Andric } 1161e8d8bef9SDimitry Andric __DEVICE__ 1162e8d8bef9SDimitry Andric double __ddiv_ru(double __x, double __y) { 11635ffd83dbSDimitry Andric return __ocml_div_rtp_f64(__x, __y); 11645ffd83dbSDimitry Andric } 11655ffd83dbSDimitry Andric __DEVICE__ 1166e8d8bef9SDimitry Andric double __ddiv_rz(double __x, double __y) { 11675ffd83dbSDimitry Andric return __ocml_div_rtz_f64(__x, __y); 11685ffd83dbSDimitry Andric } 1169e8d8bef9SDimitry Andric #else 11705ffd83dbSDimitry Andric __DEVICE__ 1171e8d8bef9SDimitry Andric double __ddiv_rn(double __x, double __y) { return __x / __y; } 11725ffd83dbSDimitry Andric #endif 1173e8d8bef9SDimitry Andric 11745ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 11755ffd83dbSDimitry Andric __DEVICE__ 1176e8d8bef9SDimitry Andric double __dmul_rd(double __x, double __y) { 1177e8d8bef9SDimitry Andric return __ocml_mul_rtn_f64(__x, __y); 1178e8d8bef9SDimitry Andric } 1179e8d8bef9SDimitry Andric __DEVICE__ 1180e8d8bef9SDimitry Andric double __dmul_rn(double __x, double __y) { 1181e8d8bef9SDimitry Andric return __ocml_mul_rte_f64(__x, __y); 1182e8d8bef9SDimitry Andric } 1183e8d8bef9SDimitry Andric __DEVICE__ 1184e8d8bef9SDimitry Andric double __dmul_ru(double __x, double __y) { 11855ffd83dbSDimitry Andric return __ocml_mul_rtp_f64(__x, __y); 11865ffd83dbSDimitry Andric } 11875ffd83dbSDimitry Andric __DEVICE__ 1188e8d8bef9SDimitry Andric double __dmul_rz(double __x, double __y) { 11895ffd83dbSDimitry Andric return __ocml_mul_rtz_f64(__x, __y); 11905ffd83dbSDimitry Andric } 1191e8d8bef9SDimitry Andric #else 11925ffd83dbSDimitry Andric __DEVICE__ 1193e8d8bef9SDimitry Andric double __dmul_rn(double __x, double __y) { return __x * __y; } 11945ffd83dbSDimitry Andric #endif 1195e8d8bef9SDimitry Andric 11965ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 11975ffd83dbSDimitry Andric __DEVICE__ 1198e8d8bef9SDimitry Andric double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); } 11995ffd83dbSDimitry Andric __DEVICE__ 1200e8d8bef9SDimitry Andric double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); } 12015ffd83dbSDimitry Andric __DEVICE__ 1202e8d8bef9SDimitry Andric double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); } 1203e8d8bef9SDimitry Andric __DEVICE__ 1204e8d8bef9SDimitry Andric double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); } 1205e8d8bef9SDimitry Andric #else 1206e8d8bef9SDimitry Andric __DEVICE__ 1207e8d8bef9SDimitry Andric double __drcp_rn(double __x) { return 1.0 / __x; } 12085ffd83dbSDimitry Andric #endif 1209e8d8bef9SDimitry Andric 12105ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 12115ffd83dbSDimitry Andric __DEVICE__ 1212e8d8bef9SDimitry Andric double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } 12135ffd83dbSDimitry Andric __DEVICE__ 1214e8d8bef9SDimitry Andric double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } 12155ffd83dbSDimitry Andric __DEVICE__ 1216e8d8bef9SDimitry Andric double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } 1217e8d8bef9SDimitry Andric __DEVICE__ 1218e8d8bef9SDimitry Andric double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } 1219e8d8bef9SDimitry Andric #else 1220e8d8bef9SDimitry Andric __DEVICE__ 1221*5f757f3fSDimitry Andric double __dsqrt_rn(double __x) { return __builtin_sqrt(__x); } 1222e8d8bef9SDimitry Andric #endif 1223e8d8bef9SDimitry Andric 1224e8d8bef9SDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 1225e8d8bef9SDimitry Andric __DEVICE__ 1226e8d8bef9SDimitry Andric double __dsub_rd(double __x, double __y) { 12275ffd83dbSDimitry Andric return __ocml_sub_rtn_f64(__x, __y); 12285ffd83dbSDimitry Andric } 12295ffd83dbSDimitry Andric __DEVICE__ 1230e8d8bef9SDimitry Andric double __dsub_rn(double __x, double __y) { 1231e8d8bef9SDimitry Andric return __ocml_sub_rte_f64(__x, __y); 1232e8d8bef9SDimitry Andric } 12335ffd83dbSDimitry Andric __DEVICE__ 1234e8d8bef9SDimitry Andric double __dsub_ru(double __x, double __y) { 12355ffd83dbSDimitry Andric return __ocml_sub_rtp_f64(__x, __y); 12365ffd83dbSDimitry Andric } 12375ffd83dbSDimitry Andric __DEVICE__ 1238e8d8bef9SDimitry Andric double __dsub_rz(double __x, double __y) { 12395ffd83dbSDimitry Andric return __ocml_sub_rtz_f64(__x, __y); 12405ffd83dbSDimitry Andric } 1241e8d8bef9SDimitry Andric #else 12425ffd83dbSDimitry Andric __DEVICE__ 1243e8d8bef9SDimitry Andric double __dsub_rn(double __x, double __y) { return __x - __y; } 12445ffd83dbSDimitry Andric #endif 1245e8d8bef9SDimitry Andric 12465ffd83dbSDimitry Andric #if defined OCML_BASIC_ROUNDED_OPERATIONS 12475ffd83dbSDimitry Andric __DEVICE__ 1248e8d8bef9SDimitry Andric double __fma_rd(double __x, double __y, double __z) { 1249e8d8bef9SDimitry Andric return __ocml_fma_rtn_f64(__x, __y, __z); 1250e8d8bef9SDimitry Andric } 1251e8d8bef9SDimitry Andric __DEVICE__ 1252e8d8bef9SDimitry Andric double __fma_rn(double __x, double __y, double __z) { 1253e8d8bef9SDimitry Andric return __ocml_fma_rte_f64(__x, __y, __z); 1254e8d8bef9SDimitry Andric } 1255e8d8bef9SDimitry Andric __DEVICE__ 1256e8d8bef9SDimitry Andric double __fma_ru(double __x, double __y, double __z) { 12575ffd83dbSDimitry Andric return __ocml_fma_rtp_f64(__x, __y, __z); 12585ffd83dbSDimitry Andric } 12595ffd83dbSDimitry Andric __DEVICE__ 1260e8d8bef9SDimitry Andric double __fma_rz(double __x, double __y, double __z) { 12615ffd83dbSDimitry Andric return __ocml_fma_rtz_f64(__x, __y, __z); 12625ffd83dbSDimitry Andric } 1263e8d8bef9SDimitry Andric #else 1264e8d8bef9SDimitry Andric __DEVICE__ 1265e8d8bef9SDimitry Andric double __fma_rn(double __x, double __y, double __z) { 126606c3fb27SDimitry Andric return __builtin_fma(__x, __y, __z); 1267e8d8bef9SDimitry Andric } 12685ffd83dbSDimitry Andric #endif 12695ffd83dbSDimitry Andric // END INTRINSICS 12705ffd83dbSDimitry Andric // END DOUBLE 12715ffd83dbSDimitry Andric 1272e8d8bef9SDimitry Andric // C only macros 1273e8d8bef9SDimitry Andric #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L 1274e8d8bef9SDimitry Andric #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) 1275e8d8bef9SDimitry Andric #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) 1276e8d8bef9SDimitry Andric #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) 1277e8d8bef9SDimitry Andric #define signbit(__x) \ 1278e8d8bef9SDimitry Andric _Generic((__x), float : __signbitf, double : __signbit)(__x) 1279e8d8bef9SDimitry Andric #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L 12805ffd83dbSDimitry Andric 12815ffd83dbSDimitry Andric #if defined(__cplusplus) 1282e8d8bef9SDimitry Andric template <class T> __DEVICE__ T min(T __arg1, T __arg2) { 12835ffd83dbSDimitry Andric return (__arg1 < __arg2) ? __arg1 : __arg2; 12845ffd83dbSDimitry Andric } 12855ffd83dbSDimitry Andric 1286e8d8bef9SDimitry Andric template <class T> __DEVICE__ T max(T __arg1, T __arg2) { 12875ffd83dbSDimitry Andric return (__arg1 > __arg2) ? __arg1 : __arg2; 12885ffd83dbSDimitry Andric } 12895ffd83dbSDimitry Andric 1290e8d8bef9SDimitry Andric __DEVICE__ int min(int __arg1, int __arg2) { 12915ffd83dbSDimitry Andric return (__arg1 < __arg2) ? __arg1 : __arg2; 12925ffd83dbSDimitry Andric } 1293e8d8bef9SDimitry Andric __DEVICE__ int max(int __arg1, int __arg2) { 12945ffd83dbSDimitry Andric return (__arg1 > __arg2) ? __arg1 : __arg2; 12955ffd83dbSDimitry Andric } 12965ffd83dbSDimitry Andric 12975ffd83dbSDimitry Andric __DEVICE__ 129806c3fb27SDimitry Andric float max(float __x, float __y) { return __builtin_fmaxf(__x, __y); } 12995ffd83dbSDimitry Andric 13005ffd83dbSDimitry Andric __DEVICE__ 130106c3fb27SDimitry Andric double max(double __x, double __y) { return __builtin_fmax(__x, __y); } 13025ffd83dbSDimitry Andric 13035ffd83dbSDimitry Andric __DEVICE__ 130406c3fb27SDimitry Andric float min(float __x, float __y) { return __builtin_fminf(__x, __y); } 13055ffd83dbSDimitry Andric 13065ffd83dbSDimitry Andric __DEVICE__ 130706c3fb27SDimitry Andric double min(double __x, double __y) { return __builtin_fmin(__x, __y); } 13085ffd83dbSDimitry Andric 130969ade1e0SDimitry Andric #if !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) 13105ffd83dbSDimitry Andric __host__ inline static int min(int __arg1, int __arg2) { 1311*5f757f3fSDimitry Andric return __arg1 < __arg2 ? __arg1 : __arg2; 13125ffd83dbSDimitry Andric } 13135ffd83dbSDimitry Andric 13145ffd83dbSDimitry Andric __host__ inline static int max(int __arg1, int __arg2) { 1315*5f757f3fSDimitry Andric return __arg1 > __arg2 ? __arg1 : __arg2; 13165ffd83dbSDimitry Andric } 131769ade1e0SDimitry Andric #endif // !defined(__HIPCC_RTC__) && !defined(__OPENMP_AMDGCN__) 1318e8d8bef9SDimitry Andric #endif 13195ffd83dbSDimitry Andric 13205ffd83dbSDimitry Andric #pragma pop_macro("__DEVICE__") 13215ffd83dbSDimitry Andric #pragma pop_macro("__RETURN_TYPE") 1322*5f757f3fSDimitry Andric #pragma pop_macro("__FAST_OR_SLOW") 13235ffd83dbSDimitry Andric 13245ffd83dbSDimitry Andric #endif // __CLANG_HIP_MATH_H__ 1325