1 /*===---- __clang_hip_math.h - Device-side HIP math support ----------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __CLANG_HIP_MATH_H__ 10 #define __CLANG_HIP_MATH_H__ 11 12 #if !defined(__HIP__) 13 #error "This file is for HIP and OpenMP AMDGCN device compilation only." 14 #endif 15 16 #if defined(__cplusplus) 17 #include <algorithm> 18 #endif 19 #include <limits.h> 20 #include <stdint.h> 21 22 #pragma push_macro("__DEVICE__") 23 #define __DEVICE__ static __device__ inline __attribute__((always_inline)) 24 25 // A few functions return bool type starting only in C++11. 26 #pragma push_macro("__RETURN_TYPE") 27 #if defined(__cplusplus) 28 #define __RETURN_TYPE bool 29 #else 30 #define __RETURN_TYPE int 31 #endif 32 33 #if defined (__cplusplus) && __cplusplus < 201103L 34 // emulate static_assert on type sizes 35 template<bool> 36 struct __compare_result{}; 37 template<> 38 struct __compare_result<true> { 39 static const bool valid; 40 }; 41 42 __DEVICE__ 43 void __suppress_unused_warning(bool b){}; 44 template <unsigned int S, unsigned int T> 45 __DEVICE__ void __static_assert_equal_size() { 46 __suppress_unused_warning(__compare_result<S == T>::valid); 47 } 48 49 #define __static_assert_type_size_equal(A, B) \ 50 __static_assert_equal_size<A,B>() 51 52 #else 53 #define __static_assert_type_size_equal(A,B) \ 54 static_assert((A) == (B), "") 55 56 #endif 57 58 __DEVICE__ 59 uint64_t __make_mantissa_base8(const char *__tagp) { 60 uint64_t __r = 0; 61 while (__tagp) { 62 char __tmp = *__tagp; 63 64 if (__tmp >= '0' && __tmp <= '7') 65 __r = (__r * 8u) + __tmp - '0'; 66 else 67 return 0; 68 69 ++__tagp; 70 } 71 72 return __r; 73 } 74 75 __DEVICE__ 76 uint64_t __make_mantissa_base10(const char *__tagp) { 77 uint64_t __r = 0; 78 while (__tagp) { 79 char __tmp = *__tagp; 80 81 if (__tmp >= '0' && __tmp <= '9') 82 __r = (__r * 10u) + __tmp - '0'; 83 else 84 return 0; 85 86 ++__tagp; 87 } 88 89 return __r; 90 } 91 92 __DEVICE__ 93 uint64_t __make_mantissa_base16(const char *__tagp) { 94 uint64_t __r = 0; 95 while (__tagp) { 96 char __tmp = *__tagp; 97 98 if (__tmp >= '0' && __tmp <= '9') 99 __r = (__r * 16u) + __tmp - '0'; 100 else if (__tmp >= 'a' && __tmp <= 'f') 101 __r = (__r * 16u) + __tmp - 'a' + 10; 102 else if (__tmp >= 'A' && __tmp <= 'F') 103 __r = (__r * 16u) + __tmp - 'A' + 10; 104 else 105 return 0; 106 107 ++__tagp; 108 } 109 110 return __r; 111 } 112 113 __DEVICE__ 114 uint64_t __make_mantissa(const char *__tagp) { 115 if (!__tagp) 116 return 0u; 117 118 if (*__tagp == '0') { 119 ++__tagp; 120 121 if (*__tagp == 'x' || *__tagp == 'X') 122 return __make_mantissa_base16(__tagp); 123 else 124 return __make_mantissa_base8(__tagp); 125 } 126 127 return __make_mantissa_base10(__tagp); 128 } 129 130 // BEGIN FLOAT 131 #if defined(__cplusplus) 132 __DEVICE__ 133 int abs(int __x) { 134 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); 135 return (__x ^ __sgn) - __sgn; 136 } 137 __DEVICE__ 138 long labs(long __x) { 139 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); 140 return (__x ^ __sgn) - __sgn; 141 } 142 __DEVICE__ 143 long long llabs(long long __x) { 144 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); 145 return (__x ^ __sgn) - __sgn; 146 } 147 #endif 148 149 __DEVICE__ 150 float acosf(float __x) { return __ocml_acos_f32(__x); } 151 152 __DEVICE__ 153 float acoshf(float __x) { return __ocml_acosh_f32(__x); } 154 155 __DEVICE__ 156 float asinf(float __x) { return __ocml_asin_f32(__x); } 157 158 __DEVICE__ 159 float asinhf(float __x) { return __ocml_asinh_f32(__x); } 160 161 __DEVICE__ 162 float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } 163 164 __DEVICE__ 165 float atanf(float __x) { return __ocml_atan_f32(__x); } 166 167 __DEVICE__ 168 float atanhf(float __x) { return __ocml_atanh_f32(__x); } 169 170 __DEVICE__ 171 float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } 172 173 __DEVICE__ 174 float ceilf(float __x) { return __ocml_ceil_f32(__x); } 175 176 __DEVICE__ 177 float copysignf(float __x, float __y) { return __ocml_copysign_f32(__x, __y); } 178 179 __DEVICE__ 180 float cosf(float __x) { return __ocml_cos_f32(__x); } 181 182 __DEVICE__ 183 float coshf(float __x) { return __ocml_cosh_f32(__x); } 184 185 __DEVICE__ 186 float cospif(float __x) { return __ocml_cospi_f32(__x); } 187 188 __DEVICE__ 189 float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } 190 191 __DEVICE__ 192 float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } 193 194 __DEVICE__ 195 float erfcf(float __x) { return __ocml_erfc_f32(__x); } 196 197 __DEVICE__ 198 float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } 199 200 __DEVICE__ 201 float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } 202 203 __DEVICE__ 204 float erff(float __x) { return __ocml_erf_f32(__x); } 205 206 __DEVICE__ 207 float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } 208 209 __DEVICE__ 210 float exp10f(float __x) { return __ocml_exp10_f32(__x); } 211 212 __DEVICE__ 213 float exp2f(float __x) { return __ocml_exp2_f32(__x); } 214 215 __DEVICE__ 216 float expf(float __x) { return __ocml_exp_f32(__x); } 217 218 __DEVICE__ 219 float expm1f(float __x) { return __ocml_expm1_f32(__x); } 220 221 __DEVICE__ 222 float fabsf(float __x) { return __ocml_fabs_f32(__x); } 223 224 __DEVICE__ 225 float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } 226 227 __DEVICE__ 228 float fdividef(float __x, float __y) { return __x / __y; } 229 230 __DEVICE__ 231 float floorf(float __x) { return __ocml_floor_f32(__x); } 232 233 __DEVICE__ 234 float fmaf(float __x, float __y, float __z) { 235 return __ocml_fma_f32(__x, __y, __z); 236 } 237 238 __DEVICE__ 239 float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } 240 241 __DEVICE__ 242 float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } 243 244 __DEVICE__ 245 float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } 246 247 __DEVICE__ 248 float frexpf(float __x, int *__nptr) { 249 int __tmp; 250 float __r = 251 __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); 252 *__nptr = __tmp; 253 254 return __r; 255 } 256 257 __DEVICE__ 258 float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } 259 260 __DEVICE__ 261 int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } 262 263 __DEVICE__ 264 __RETURN_TYPE __finitef(float __x) { return __ocml_isfinite_f32(__x); } 265 266 __DEVICE__ 267 __RETURN_TYPE __isinff(float __x) { return __ocml_isinf_f32(__x); } 268 269 __DEVICE__ 270 __RETURN_TYPE __isnanf(float __x) { return __ocml_isnan_f32(__x); } 271 272 __DEVICE__ 273 float j0f(float __x) { return __ocml_j0_f32(__x); } 274 275 __DEVICE__ 276 float j1f(float __x) { return __ocml_j1_f32(__x); } 277 278 __DEVICE__ 279 float jnf(int __n, float __x) { // TODO: we could use Ahmes multiplication 280 // and the Miller & Brown algorithm 281 // for linear recurrences to get O(log n) steps, but it's unclear if 282 // it'd be beneficial in this case. 283 if (__n == 0) 284 return j0f(__x); 285 if (__n == 1) 286 return j1f(__x); 287 288 float __x0 = j0f(__x); 289 float __x1 = j1f(__x); 290 for (int __i = 1; __i < __n; ++__i) { 291 float __x2 = (2 * __i) / __x * __x1 - __x0; 292 __x0 = __x1; 293 __x1 = __x2; 294 } 295 296 return __x1; 297 } 298 299 __DEVICE__ 300 float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } 301 302 __DEVICE__ 303 float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } 304 305 __DEVICE__ 306 long long int llrintf(float __x) { return __ocml_rint_f32(__x); } 307 308 __DEVICE__ 309 long long int llroundf(float __x) { return __ocml_round_f32(__x); } 310 311 __DEVICE__ 312 float log10f(float __x) { return __ocml_log10_f32(__x); } 313 314 __DEVICE__ 315 float log1pf(float __x) { return __ocml_log1p_f32(__x); } 316 317 __DEVICE__ 318 float log2f(float __x) { return __ocml_log2_f32(__x); } 319 320 __DEVICE__ 321 float logbf(float __x) { return __ocml_logb_f32(__x); } 322 323 __DEVICE__ 324 float logf(float __x) { return __ocml_log_f32(__x); } 325 326 __DEVICE__ 327 long int lrintf(float __x) { return __ocml_rint_f32(__x); } 328 329 __DEVICE__ 330 long int lroundf(float __x) { return __ocml_round_f32(__x); } 331 332 __DEVICE__ 333 float modff(float __x, float *__iptr) { 334 float __tmp; 335 float __r = 336 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 337 *__iptr = __tmp; 338 return __r; 339 } 340 341 __DEVICE__ 342 float nanf(const char *__tagp) { 343 union { 344 float val; 345 struct ieee_float { 346 unsigned int mantissa : 22; 347 unsigned int quiet : 1; 348 unsigned int exponent : 8; 349 unsigned int sign : 1; 350 } bits; 351 } __tmp; 352 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); 353 354 __tmp.bits.sign = 0u; 355 __tmp.bits.exponent = ~0u; 356 __tmp.bits.quiet = 1u; 357 __tmp.bits.mantissa = __make_mantissa(__tagp); 358 359 return __tmp.val; 360 } 361 362 __DEVICE__ 363 float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } 364 365 __DEVICE__ 366 float nextafterf(float __x, float __y) { 367 return __ocml_nextafter_f32(__x, __y); 368 } 369 370 __DEVICE__ 371 float norm3df(float __x, float __y, float __z) { 372 return __ocml_len3_f32(__x, __y, __z); 373 } 374 375 __DEVICE__ 376 float norm4df(float __x, float __y, float __z, float __w) { 377 return __ocml_len4_f32(__x, __y, __z, __w); 378 } 379 380 __DEVICE__ 381 float normcdff(float __x) { return __ocml_ncdf_f32(__x); } 382 383 __DEVICE__ 384 float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } 385 386 __DEVICE__ 387 float normf(int __dim, 388 const float *__a) { // TODO: placeholder until OCML adds support. 389 float __r = 0; 390 while (__dim--) { 391 __r += __a[0] * __a[0]; 392 ++__a; 393 } 394 395 return __ocml_sqrt_f32(__r); 396 } 397 398 __DEVICE__ 399 float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 400 401 __DEVICE__ 402 float powif(float __x, int __y) { return __ocml_pown_f32(__x, __y); } 403 404 __DEVICE__ 405 float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } 406 407 __DEVICE__ 408 float remainderf(float __x, float __y) { 409 return __ocml_remainder_f32(__x, __y); 410 } 411 412 __DEVICE__ 413 float remquof(float __x, float __y, int *__quo) { 414 int __tmp; 415 float __r = __ocml_remquo_f32( 416 __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 417 *__quo = __tmp; 418 419 return __r; 420 } 421 422 __DEVICE__ 423 float rhypotf(float __x, float __y) { return __ocml_rhypot_f32(__x, __y); } 424 425 __DEVICE__ 426 float rintf(float __x) { return __ocml_rint_f32(__x); } 427 428 __DEVICE__ 429 float rnorm3df(float __x, float __y, float __z) { 430 return __ocml_rlen3_f32(__x, __y, __z); 431 } 432 433 __DEVICE__ 434 float rnorm4df(float __x, float __y, float __z, float __w) { 435 return __ocml_rlen4_f32(__x, __y, __z, __w); 436 } 437 438 __DEVICE__ 439 float rnormf(int __dim, 440 const float *__a) { // TODO: placeholder until OCML adds support. 441 float __r = 0; 442 while (__dim--) { 443 __r += __a[0] * __a[0]; 444 ++__a; 445 } 446 447 return __ocml_rsqrt_f32(__r); 448 } 449 450 __DEVICE__ 451 float roundf(float __x) { return __ocml_round_f32(__x); } 452 453 __DEVICE__ 454 float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } 455 456 __DEVICE__ 457 float scalblnf(float __x, long int __n) { 458 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) 459 : __ocml_scalb_f32(__x, __n); 460 } 461 462 __DEVICE__ 463 float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } 464 465 __DEVICE__ 466 __RETURN_TYPE __signbitf(float __x) { return __ocml_signbit_f32(__x); } 467 468 __DEVICE__ 469 void sincosf(float __x, float *__sinptr, float *__cosptr) { 470 float __tmp; 471 *__sinptr = 472 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 473 *__cosptr = __tmp; 474 } 475 476 __DEVICE__ 477 void sincospif(float __x, float *__sinptr, float *__cosptr) { 478 float __tmp; 479 *__sinptr = __ocml_sincospi_f32( 480 __x, (__attribute__((address_space(5))) float *)&__tmp); 481 *__cosptr = __tmp; 482 } 483 484 __DEVICE__ 485 float sinf(float __x) { return __ocml_sin_f32(__x); } 486 487 __DEVICE__ 488 float sinhf(float __x) { return __ocml_sinh_f32(__x); } 489 490 __DEVICE__ 491 float sinpif(float __x) { return __ocml_sinpi_f32(__x); } 492 493 __DEVICE__ 494 float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } 495 496 __DEVICE__ 497 float tanf(float __x) { return __ocml_tan_f32(__x); } 498 499 __DEVICE__ 500 float tanhf(float __x) { return __ocml_tanh_f32(__x); } 501 502 __DEVICE__ 503 float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } 504 505 __DEVICE__ 506 float truncf(float __x) { return __ocml_trunc_f32(__x); } 507 508 __DEVICE__ 509 float y0f(float __x) { return __ocml_y0_f32(__x); } 510 511 __DEVICE__ 512 float y1f(float __x) { return __ocml_y1_f32(__x); } 513 514 __DEVICE__ 515 float ynf(int __n, float __x) { // TODO: we could use Ahmes multiplication 516 // and the Miller & Brown algorithm 517 // for linear recurrences to get O(log n) steps, but it's unclear if 518 // it'd be beneficial in this case. Placeholder until OCML adds 519 // support. 520 if (__n == 0) 521 return y0f(__x); 522 if (__n == 1) 523 return y1f(__x); 524 525 float __x0 = y0f(__x); 526 float __x1 = y1f(__x); 527 for (int __i = 1; __i < __n; ++__i) { 528 float __x2 = (2 * __i) / __x * __x1 - __x0; 529 __x0 = __x1; 530 __x1 = __x2; 531 } 532 533 return __x1; 534 } 535 536 // BEGIN INTRINSICS 537 538 __DEVICE__ 539 float __cosf(float __x) { return __ocml_native_cos_f32(__x); } 540 541 __DEVICE__ 542 float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } 543 544 __DEVICE__ 545 float __expf(float __x) { return __ocml_native_exp_f32(__x); } 546 547 #if defined OCML_BASIC_ROUNDED_OPERATIONS 548 __DEVICE__ 549 float __fadd_rd(float __x, float __y) { return __ocml_add_rtn_f32(__x, __y); } 550 __DEVICE__ 551 float __fadd_rn(float __x, float __y) { return __ocml_add_rte_f32(__x, __y); } 552 __DEVICE__ 553 float __fadd_ru(float __x, float __y) { return __ocml_add_rtp_f32(__x, __y); } 554 __DEVICE__ 555 float __fadd_rz(float __x, float __y) { return __ocml_add_rtz_f32(__x, __y); } 556 #else 557 __DEVICE__ 558 float __fadd_rn(float __x, float __y) { return __x + __y; } 559 #endif 560 561 #if defined OCML_BASIC_ROUNDED_OPERATIONS 562 __DEVICE__ 563 float __fdiv_rd(float __x, float __y) { return __ocml_div_rtn_f32(__x, __y); } 564 __DEVICE__ 565 float __fdiv_rn(float __x, float __y) { return __ocml_div_rte_f32(__x, __y); } 566 __DEVICE__ 567 float __fdiv_ru(float __x, float __y) { return __ocml_div_rtp_f32(__x, __y); } 568 __DEVICE__ 569 float __fdiv_rz(float __x, float __y) { return __ocml_div_rtz_f32(__x, __y); } 570 #else 571 __DEVICE__ 572 float __fdiv_rn(float __x, float __y) { return __x / __y; } 573 #endif 574 575 __DEVICE__ 576 float __fdividef(float __x, float __y) { return __x / __y; } 577 578 #if defined OCML_BASIC_ROUNDED_OPERATIONS 579 __DEVICE__ 580 float __fmaf_rd(float __x, float __y, float __z) { 581 return __ocml_fma_rtn_f32(__x, __y, __z); 582 } 583 __DEVICE__ 584 float __fmaf_rn(float __x, float __y, float __z) { 585 return __ocml_fma_rte_f32(__x, __y, __z); 586 } 587 __DEVICE__ 588 float __fmaf_ru(float __x, float __y, float __z) { 589 return __ocml_fma_rtp_f32(__x, __y, __z); 590 } 591 __DEVICE__ 592 float __fmaf_rz(float __x, float __y, float __z) { 593 return __ocml_fma_rtz_f32(__x, __y, __z); 594 } 595 #else 596 __DEVICE__ 597 float __fmaf_rn(float __x, float __y, float __z) { 598 return __ocml_fma_f32(__x, __y, __z); 599 } 600 #endif 601 602 #if defined OCML_BASIC_ROUNDED_OPERATIONS 603 __DEVICE__ 604 float __fmul_rd(float __x, float __y) { return __ocml_mul_rtn_f32(__x, __y); } 605 __DEVICE__ 606 float __fmul_rn(float __x, float __y) { return __ocml_mul_rte_f32(__x, __y); } 607 __DEVICE__ 608 float __fmul_ru(float __x, float __y) { return __ocml_mul_rtp_f32(__x, __y); } 609 __DEVICE__ 610 float __fmul_rz(float __x, float __y) { return __ocml_mul_rtz_f32(__x, __y); } 611 #else 612 __DEVICE__ 613 float __fmul_rn(float __x, float __y) { return __x * __y; } 614 #endif 615 616 #if defined OCML_BASIC_ROUNDED_OPERATIONS 617 __DEVICE__ 618 float __frcp_rd(float __x) { return __ocml_div_rtn_f32(1.0f, __x); } 619 __DEVICE__ 620 float __frcp_rn(float __x) { return __ocml_div_rte_f32(1.0f, __x); } 621 __DEVICE__ 622 float __frcp_ru(float __x) { return __ocml_div_rtp_f32(1.0f, __x); } 623 __DEVICE__ 624 float __frcp_rz(float __x) { return __ocml_div_rtz_f32(1.0f, __x); } 625 #else 626 __DEVICE__ 627 float __frcp_rn(float __x) { return 1.0f / __x; } 628 #endif 629 630 __DEVICE__ 631 float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } 632 633 #if defined OCML_BASIC_ROUNDED_OPERATIONS 634 __DEVICE__ 635 float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } 636 __DEVICE__ 637 float __fsqrt_rn(float __x) { return __ocml_sqrt_rte_f32(__x); } 638 __DEVICE__ 639 float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } 640 __DEVICE__ 641 float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } 642 #else 643 __DEVICE__ 644 float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } 645 #endif 646 647 #if defined OCML_BASIC_ROUNDED_OPERATIONS 648 __DEVICE__ 649 float __fsub_rd(float __x, float __y) { return __ocml_sub_rtn_f32(__x, __y); } 650 __DEVICE__ 651 float __fsub_rn(float __x, float __y) { return __ocml_sub_rte_f32(__x, __y); } 652 __DEVICE__ 653 float __fsub_ru(float __x, float __y) { return __ocml_sub_rtp_f32(__x, __y); } 654 __DEVICE__ 655 float __fsub_rz(float __x, float __y) { return __ocml_sub_rtz_f32(__x, __y); } 656 #else 657 __DEVICE__ 658 float __fsub_rn(float __x, float __y) { return __x - __y; } 659 #endif 660 661 __DEVICE__ 662 float __log10f(float __x) { return __ocml_native_log10_f32(__x); } 663 664 __DEVICE__ 665 float __log2f(float __x) { return __ocml_native_log2_f32(__x); } 666 667 __DEVICE__ 668 float __logf(float __x) { return __ocml_native_log_f32(__x); } 669 670 __DEVICE__ 671 float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 672 673 __DEVICE__ 674 float __saturatef(float __x) { return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); } 675 676 __DEVICE__ 677 void __sincosf(float __x, float *__sinptr, float *__cosptr) { 678 *__sinptr = __ocml_native_sin_f32(__x); 679 *__cosptr = __ocml_native_cos_f32(__x); 680 } 681 682 __DEVICE__ 683 float __sinf(float __x) { return __ocml_native_sin_f32(__x); } 684 685 __DEVICE__ 686 float __tanf(float __x) { return __ocml_tan_f32(__x); } 687 // END INTRINSICS 688 // END FLOAT 689 690 // BEGIN DOUBLE 691 __DEVICE__ 692 double acos(double __x) { return __ocml_acos_f64(__x); } 693 694 __DEVICE__ 695 double acosh(double __x) { return __ocml_acosh_f64(__x); } 696 697 __DEVICE__ 698 double asin(double __x) { return __ocml_asin_f64(__x); } 699 700 __DEVICE__ 701 double asinh(double __x) { return __ocml_asinh_f64(__x); } 702 703 __DEVICE__ 704 double atan(double __x) { return __ocml_atan_f64(__x); } 705 706 __DEVICE__ 707 double atan2(double __x, double __y) { return __ocml_atan2_f64(__x, __y); } 708 709 __DEVICE__ 710 double atanh(double __x) { return __ocml_atanh_f64(__x); } 711 712 __DEVICE__ 713 double cbrt(double __x) { return __ocml_cbrt_f64(__x); } 714 715 __DEVICE__ 716 double ceil(double __x) { return __ocml_ceil_f64(__x); } 717 718 __DEVICE__ 719 double copysign(double __x, double __y) { 720 return __ocml_copysign_f64(__x, __y); 721 } 722 723 __DEVICE__ 724 double cos(double __x) { return __ocml_cos_f64(__x); } 725 726 __DEVICE__ 727 double cosh(double __x) { return __ocml_cosh_f64(__x); } 728 729 __DEVICE__ 730 double cospi(double __x) { return __ocml_cospi_f64(__x); } 731 732 __DEVICE__ 733 double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } 734 735 __DEVICE__ 736 double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } 737 738 __DEVICE__ 739 double erf(double __x) { return __ocml_erf_f64(__x); } 740 741 __DEVICE__ 742 double erfc(double __x) { return __ocml_erfc_f64(__x); } 743 744 __DEVICE__ 745 double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } 746 747 __DEVICE__ 748 double erfcx(double __x) { return __ocml_erfcx_f64(__x); } 749 750 __DEVICE__ 751 double erfinv(double __x) { return __ocml_erfinv_f64(__x); } 752 753 __DEVICE__ 754 double exp(double __x) { return __ocml_exp_f64(__x); } 755 756 __DEVICE__ 757 double exp10(double __x) { return __ocml_exp10_f64(__x); } 758 759 __DEVICE__ 760 double exp2(double __x) { return __ocml_exp2_f64(__x); } 761 762 __DEVICE__ 763 double expm1(double __x) { return __ocml_expm1_f64(__x); } 764 765 __DEVICE__ 766 double fabs(double __x) { return __ocml_fabs_f64(__x); } 767 768 __DEVICE__ 769 double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } 770 771 __DEVICE__ 772 double floor(double __x) { return __ocml_floor_f64(__x); } 773 774 __DEVICE__ 775 double fma(double __x, double __y, double __z) { 776 return __ocml_fma_f64(__x, __y, __z); 777 } 778 779 __DEVICE__ 780 double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } 781 782 __DEVICE__ 783 double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } 784 785 __DEVICE__ 786 double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } 787 788 __DEVICE__ 789 double frexp(double __x, int *__nptr) { 790 int __tmp; 791 double __r = 792 __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); 793 *__nptr = __tmp; 794 return __r; 795 } 796 797 __DEVICE__ 798 double hypot(double __x, double __y) { return __ocml_hypot_f64(__x, __y); } 799 800 __DEVICE__ 801 int ilogb(double __x) { return __ocml_ilogb_f64(__x); } 802 803 __DEVICE__ 804 __RETURN_TYPE __finite(double __x) { return __ocml_isfinite_f64(__x); } 805 806 __DEVICE__ 807 __RETURN_TYPE __isinf(double __x) { return __ocml_isinf_f64(__x); } 808 809 __DEVICE__ 810 __RETURN_TYPE __isnan(double __x) { return __ocml_isnan_f64(__x); } 811 812 __DEVICE__ 813 double j0(double __x) { return __ocml_j0_f64(__x); } 814 815 __DEVICE__ 816 double j1(double __x) { return __ocml_j1_f64(__x); } 817 818 __DEVICE__ 819 double jn(int __n, double __x) { // TODO: we could use Ahmes multiplication 820 // and the Miller & Brown algorithm 821 // for linear recurrences to get O(log n) steps, but it's unclear if 822 // it'd be beneficial in this case. Placeholder until OCML adds 823 // support. 824 if (__n == 0) 825 return j0(__x); 826 if (__n == 1) 827 return j1(__x); 828 829 double __x0 = j0(__x); 830 double __x1 = j1(__x); 831 for (int __i = 1; __i < __n; ++__i) { 832 double __x2 = (2 * __i) / __x * __x1 - __x0; 833 __x0 = __x1; 834 __x1 = __x2; 835 } 836 return __x1; 837 } 838 839 __DEVICE__ 840 double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } 841 842 __DEVICE__ 843 double lgamma(double __x) { return __ocml_lgamma_f64(__x); } 844 845 __DEVICE__ 846 long long int llrint(double __x) { return __ocml_rint_f64(__x); } 847 848 __DEVICE__ 849 long long int llround(double __x) { return __ocml_round_f64(__x); } 850 851 __DEVICE__ 852 double log(double __x) { return __ocml_log_f64(__x); } 853 854 __DEVICE__ 855 double log10(double __x) { return __ocml_log10_f64(__x); } 856 857 __DEVICE__ 858 double log1p(double __x) { return __ocml_log1p_f64(__x); } 859 860 __DEVICE__ 861 double log2(double __x) { return __ocml_log2_f64(__x); } 862 863 __DEVICE__ 864 double logb(double __x) { return __ocml_logb_f64(__x); } 865 866 __DEVICE__ 867 long int lrint(double __x) { return __ocml_rint_f64(__x); } 868 869 __DEVICE__ 870 long int lround(double __x) { return __ocml_round_f64(__x); } 871 872 __DEVICE__ 873 double modf(double __x, double *__iptr) { 874 double __tmp; 875 double __r = 876 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); 877 *__iptr = __tmp; 878 879 return __r; 880 } 881 882 __DEVICE__ 883 double nan(const char *__tagp) { 884 #if !_WIN32 885 union { 886 double val; 887 struct ieee_double { 888 uint64_t mantissa : 51; 889 uint32_t quiet : 1; 890 uint32_t exponent : 11; 891 uint32_t sign : 1; 892 } bits; 893 } __tmp; 894 __static_assert_type_size_equal(sizeof(__tmp.val), sizeof(__tmp.bits)); 895 896 __tmp.bits.sign = 0u; 897 __tmp.bits.exponent = ~0u; 898 __tmp.bits.quiet = 1u; 899 __tmp.bits.mantissa = __make_mantissa(__tagp); 900 901 return __tmp.val; 902 #else 903 __static_assert_type_size_equal(sizeof(uint64_t), sizeof(double)); 904 uint64_t __val = __make_mantissa(__tagp); 905 __val |= 0xFFF << 51; 906 return *reinterpret_cast<double *>(&__val); 907 #endif 908 } 909 910 __DEVICE__ 911 double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } 912 913 __DEVICE__ 914 double nextafter(double __x, double __y) { 915 return __ocml_nextafter_f64(__x, __y); 916 } 917 918 __DEVICE__ 919 double norm(int __dim, 920 const double *__a) { // TODO: placeholder until OCML adds support. 921 double __r = 0; 922 while (__dim--) { 923 __r += __a[0] * __a[0]; 924 ++__a; 925 } 926 927 return __ocml_sqrt_f64(__r); 928 } 929 930 __DEVICE__ 931 double norm3d(double __x, double __y, double __z) { 932 return __ocml_len3_f64(__x, __y, __z); 933 } 934 935 __DEVICE__ 936 double norm4d(double __x, double __y, double __z, double __w) { 937 return __ocml_len4_f64(__x, __y, __z, __w); 938 } 939 940 __DEVICE__ 941 double normcdf(double __x) { return __ocml_ncdf_f64(__x); } 942 943 __DEVICE__ 944 double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } 945 946 __DEVICE__ 947 double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } 948 949 __DEVICE__ 950 double powi(double __x, int __y) { return __ocml_pown_f64(__x, __y); } 951 952 __DEVICE__ 953 double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } 954 955 __DEVICE__ 956 double remainder(double __x, double __y) { 957 return __ocml_remainder_f64(__x, __y); 958 } 959 960 __DEVICE__ 961 double remquo(double __x, double __y, int *__quo) { 962 int __tmp; 963 double __r = __ocml_remquo_f64( 964 __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 965 *__quo = __tmp; 966 967 return __r; 968 } 969 970 __DEVICE__ 971 double rhypot(double __x, double __y) { return __ocml_rhypot_f64(__x, __y); } 972 973 __DEVICE__ 974 double rint(double __x) { return __ocml_rint_f64(__x); } 975 976 __DEVICE__ 977 double rnorm(int __dim, 978 const double *__a) { // TODO: placeholder until OCML adds support. 979 double __r = 0; 980 while (__dim--) { 981 __r += __a[0] * __a[0]; 982 ++__a; 983 } 984 985 return __ocml_rsqrt_f64(__r); 986 } 987 988 __DEVICE__ 989 double rnorm3d(double __x, double __y, double __z) { 990 return __ocml_rlen3_f64(__x, __y, __z); 991 } 992 993 __DEVICE__ 994 double rnorm4d(double __x, double __y, double __z, double __w) { 995 return __ocml_rlen4_f64(__x, __y, __z, __w); 996 } 997 998 __DEVICE__ 999 double round(double __x) { return __ocml_round_f64(__x); } 1000 1001 __DEVICE__ 1002 double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } 1003 1004 __DEVICE__ 1005 double scalbln(double __x, long int __n) { 1006 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) 1007 : __ocml_scalb_f64(__x, __n); 1008 } 1009 __DEVICE__ 1010 double scalbn(double __x, int __n) { return __ocml_scalbn_f64(__x, __n); } 1011 1012 __DEVICE__ 1013 __RETURN_TYPE __signbit(double __x) { return __ocml_signbit_f64(__x); } 1014 1015 __DEVICE__ 1016 double sin(double __x) { return __ocml_sin_f64(__x); } 1017 1018 __DEVICE__ 1019 void sincos(double __x, double *__sinptr, double *__cosptr) { 1020 double __tmp; 1021 *__sinptr = __ocml_sincos_f64( 1022 __x, (__attribute__((address_space(5))) double *)&__tmp); 1023 *__cosptr = __tmp; 1024 } 1025 1026 __DEVICE__ 1027 void sincospi(double __x, double *__sinptr, double *__cosptr) { 1028 double __tmp; 1029 *__sinptr = __ocml_sincospi_f64( 1030 __x, (__attribute__((address_space(5))) double *)&__tmp); 1031 *__cosptr = __tmp; 1032 } 1033 1034 __DEVICE__ 1035 double sinh(double __x) { return __ocml_sinh_f64(__x); } 1036 1037 __DEVICE__ 1038 double sinpi(double __x) { return __ocml_sinpi_f64(__x); } 1039 1040 __DEVICE__ 1041 double sqrt(double __x) { return __ocml_sqrt_f64(__x); } 1042 1043 __DEVICE__ 1044 double tan(double __x) { return __ocml_tan_f64(__x); } 1045 1046 __DEVICE__ 1047 double tanh(double __x) { return __ocml_tanh_f64(__x); } 1048 1049 __DEVICE__ 1050 double tgamma(double __x) { return __ocml_tgamma_f64(__x); } 1051 1052 __DEVICE__ 1053 double trunc(double __x) { return __ocml_trunc_f64(__x); } 1054 1055 __DEVICE__ 1056 double y0(double __x) { return __ocml_y0_f64(__x); } 1057 1058 __DEVICE__ 1059 double y1(double __x) { return __ocml_y1_f64(__x); } 1060 1061 __DEVICE__ 1062 double yn(int __n, double __x) { // TODO: we could use Ahmes multiplication 1063 // and the Miller & Brown algorithm 1064 // for linear recurrences to get O(log n) steps, but it's unclear if 1065 // it'd be beneficial in this case. Placeholder until OCML adds 1066 // support. 1067 if (__n == 0) 1068 return y0(__x); 1069 if (__n == 1) 1070 return y1(__x); 1071 1072 double __x0 = y0(__x); 1073 double __x1 = y1(__x); 1074 for (int __i = 1; __i < __n; ++__i) { 1075 double __x2 = (2 * __i) / __x * __x1 - __x0; 1076 __x0 = __x1; 1077 __x1 = __x2; 1078 } 1079 1080 return __x1; 1081 } 1082 1083 // BEGIN INTRINSICS 1084 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1085 __DEVICE__ 1086 double __dadd_rd(double __x, double __y) { 1087 return __ocml_add_rtn_f64(__x, __y); 1088 } 1089 __DEVICE__ 1090 double __dadd_rn(double __x, double __y) { 1091 return __ocml_add_rte_f64(__x, __y); 1092 } 1093 __DEVICE__ 1094 double __dadd_ru(double __x, double __y) { 1095 return __ocml_add_rtp_f64(__x, __y); 1096 } 1097 __DEVICE__ 1098 double __dadd_rz(double __x, double __y) { 1099 return __ocml_add_rtz_f64(__x, __y); 1100 } 1101 #else 1102 __DEVICE__ 1103 double __dadd_rn(double __x, double __y) { return __x + __y; } 1104 #endif 1105 1106 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1107 __DEVICE__ 1108 double __ddiv_rd(double __x, double __y) { 1109 return __ocml_div_rtn_f64(__x, __y); 1110 } 1111 __DEVICE__ 1112 double __ddiv_rn(double __x, double __y) { 1113 return __ocml_div_rte_f64(__x, __y); 1114 } 1115 __DEVICE__ 1116 double __ddiv_ru(double __x, double __y) { 1117 return __ocml_div_rtp_f64(__x, __y); 1118 } 1119 __DEVICE__ 1120 double __ddiv_rz(double __x, double __y) { 1121 return __ocml_div_rtz_f64(__x, __y); 1122 } 1123 #else 1124 __DEVICE__ 1125 double __ddiv_rn(double __x, double __y) { return __x / __y; } 1126 #endif 1127 1128 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1129 __DEVICE__ 1130 double __dmul_rd(double __x, double __y) { 1131 return __ocml_mul_rtn_f64(__x, __y); 1132 } 1133 __DEVICE__ 1134 double __dmul_rn(double __x, double __y) { 1135 return __ocml_mul_rte_f64(__x, __y); 1136 } 1137 __DEVICE__ 1138 double __dmul_ru(double __x, double __y) { 1139 return __ocml_mul_rtp_f64(__x, __y); 1140 } 1141 __DEVICE__ 1142 double __dmul_rz(double __x, double __y) { 1143 return __ocml_mul_rtz_f64(__x, __y); 1144 } 1145 #else 1146 __DEVICE__ 1147 double __dmul_rn(double __x, double __y) { return __x * __y; } 1148 #endif 1149 1150 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1151 __DEVICE__ 1152 double __drcp_rd(double __x) { return __ocml_div_rtn_f64(1.0, __x); } 1153 __DEVICE__ 1154 double __drcp_rn(double __x) { return __ocml_div_rte_f64(1.0, __x); } 1155 __DEVICE__ 1156 double __drcp_ru(double __x) { return __ocml_div_rtp_f64(1.0, __x); } 1157 __DEVICE__ 1158 double __drcp_rz(double __x) { return __ocml_div_rtz_f64(1.0, __x); } 1159 #else 1160 __DEVICE__ 1161 double __drcp_rn(double __x) { return 1.0 / __x; } 1162 #endif 1163 1164 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1165 __DEVICE__ 1166 double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } 1167 __DEVICE__ 1168 double __dsqrt_rn(double __x) { return __ocml_sqrt_rte_f64(__x); } 1169 __DEVICE__ 1170 double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } 1171 __DEVICE__ 1172 double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } 1173 #else 1174 __DEVICE__ 1175 double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } 1176 #endif 1177 1178 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1179 __DEVICE__ 1180 double __dsub_rd(double __x, double __y) { 1181 return __ocml_sub_rtn_f64(__x, __y); 1182 } 1183 __DEVICE__ 1184 double __dsub_rn(double __x, double __y) { 1185 return __ocml_sub_rte_f64(__x, __y); 1186 } 1187 __DEVICE__ 1188 double __dsub_ru(double __x, double __y) { 1189 return __ocml_sub_rtp_f64(__x, __y); 1190 } 1191 __DEVICE__ 1192 double __dsub_rz(double __x, double __y) { 1193 return __ocml_sub_rtz_f64(__x, __y); 1194 } 1195 #else 1196 __DEVICE__ 1197 double __dsub_rn(double __x, double __y) { return __x - __y; } 1198 #endif 1199 1200 #if defined OCML_BASIC_ROUNDED_OPERATIONS 1201 __DEVICE__ 1202 double __fma_rd(double __x, double __y, double __z) { 1203 return __ocml_fma_rtn_f64(__x, __y, __z); 1204 } 1205 __DEVICE__ 1206 double __fma_rn(double __x, double __y, double __z) { 1207 return __ocml_fma_rte_f64(__x, __y, __z); 1208 } 1209 __DEVICE__ 1210 double __fma_ru(double __x, double __y, double __z) { 1211 return __ocml_fma_rtp_f64(__x, __y, __z); 1212 } 1213 __DEVICE__ 1214 double __fma_rz(double __x, double __y, double __z) { 1215 return __ocml_fma_rtz_f64(__x, __y, __z); 1216 } 1217 #else 1218 __DEVICE__ 1219 double __fma_rn(double __x, double __y, double __z) { 1220 return __ocml_fma_f64(__x, __y, __z); 1221 } 1222 #endif 1223 // END INTRINSICS 1224 // END DOUBLE 1225 1226 // C only macros 1227 #if !defined(__cplusplus) && __STDC_VERSION__ >= 201112L 1228 #define isfinite(__x) _Generic((__x), float : __finitef, double : __finite)(__x) 1229 #define isinf(__x) _Generic((__x), float : __isinff, double : __isinf)(__x) 1230 #define isnan(__x) _Generic((__x), float : __isnanf, double : __isnan)(__x) 1231 #define signbit(__x) \ 1232 _Generic((__x), float : __signbitf, double : __signbit)(__x) 1233 #endif // !defined(__cplusplus) && __STDC_VERSION__ >= 201112L 1234 1235 #if defined(__cplusplus) 1236 template <class T> __DEVICE__ T min(T __arg1, T __arg2) { 1237 return (__arg1 < __arg2) ? __arg1 : __arg2; 1238 } 1239 1240 template <class T> __DEVICE__ T max(T __arg1, T __arg2) { 1241 return (__arg1 > __arg2) ? __arg1 : __arg2; 1242 } 1243 1244 __DEVICE__ int min(int __arg1, int __arg2) { 1245 return (__arg1 < __arg2) ? __arg1 : __arg2; 1246 } 1247 __DEVICE__ int max(int __arg1, int __arg2) { 1248 return (__arg1 > __arg2) ? __arg1 : __arg2; 1249 } 1250 1251 __DEVICE__ 1252 float max(float __x, float __y) { return fmaxf(__x, __y); } 1253 1254 __DEVICE__ 1255 double max(double __x, double __y) { return fmax(__x, __y); } 1256 1257 __DEVICE__ 1258 float min(float __x, float __y) { return fminf(__x, __y); } 1259 1260 __DEVICE__ 1261 double min(double __x, double __y) { return fmin(__x, __y); } 1262 1263 __host__ inline static int min(int __arg1, int __arg2) { 1264 return std::min(__arg1, __arg2); 1265 } 1266 1267 __host__ inline static int max(int __arg1, int __arg2) { 1268 return std::max(__arg1, __arg2); 1269 } 1270 #endif 1271 1272 #pragma pop_macro("__DEVICE__") 1273 #pragma pop_macro("__RETURN_TYPE") 1274 1275 #endif // __CLANG_HIP_MATH_H__ 1276