1 /*===---- __clang_hip_math.h - HIP math decls -------------------------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 10 #ifndef __CLANG_HIP_MATH_H__ 11 #define __CLANG_HIP_MATH_H__ 12 13 #include <algorithm> 14 #include <limits.h> 15 #include <limits> 16 #include <stdint.h> 17 18 #pragma push_macro("__DEVICE__") 19 #pragma push_macro("__RETURN_TYPE") 20 21 // to be consistent with __clang_cuda_math_forward_declares 22 #define __DEVICE__ static __device__ 23 #define __RETURN_TYPE bool 24 25 __DEVICE__ 26 inline uint64_t __make_mantissa_base8(const char *__tagp) { 27 uint64_t __r = 0; 28 while (__tagp) { 29 char __tmp = *__tagp; 30 31 if (__tmp >= '0' && __tmp <= '7') 32 __r = (__r * 8u) + __tmp - '0'; 33 else 34 return 0; 35 36 ++__tagp; 37 } 38 39 return __r; 40 } 41 42 __DEVICE__ 43 inline uint64_t __make_mantissa_base10(const char *__tagp) { 44 uint64_t __r = 0; 45 while (__tagp) { 46 char __tmp = *__tagp; 47 48 if (__tmp >= '0' && __tmp <= '9') 49 __r = (__r * 10u) + __tmp - '0'; 50 else 51 return 0; 52 53 ++__tagp; 54 } 55 56 return __r; 57 } 58 59 __DEVICE__ 60 inline uint64_t __make_mantissa_base16(const char *__tagp) { 61 uint64_t __r = 0; 62 while (__tagp) { 63 char __tmp = *__tagp; 64 65 if (__tmp >= '0' && __tmp <= '9') 66 __r = (__r * 16u) + __tmp - '0'; 67 else if (__tmp >= 'a' && __tmp <= 'f') 68 __r = (__r * 16u) + __tmp - 'a' + 10; 69 else if (__tmp >= 'A' && __tmp <= 'F') 70 __r = (__r * 16u) + __tmp - 'A' + 10; 71 else 72 return 0; 73 74 ++__tagp; 75 } 76 77 return __r; 78 } 79 80 __DEVICE__ 81 inline uint64_t __make_mantissa(const char *__tagp) { 82 if (!__tagp) 83 return 0u; 84 85 if (*__tagp == '0') { 86 ++__tagp; 87 88 if (*__tagp == 'x' || *__tagp == 'X') 89 return __make_mantissa_base16(__tagp); 90 else 91 return __make_mantissa_base8(__tagp); 92 } 93 94 return __make_mantissa_base10(__tagp); 95 } 96 97 // BEGIN FLOAT 98 __DEVICE__ 99 inline float abs(float __x) { return __ocml_fabs_f32(__x); } 100 __DEVICE__ 101 inline float acosf(float __x) { return __ocml_acos_f32(__x); } 102 __DEVICE__ 103 inline float acoshf(float __x) { return __ocml_acosh_f32(__x); } 104 __DEVICE__ 105 inline float asinf(float __x) { return __ocml_asin_f32(__x); } 106 __DEVICE__ 107 inline float asinhf(float __x) { return __ocml_asinh_f32(__x); } 108 __DEVICE__ 109 inline float atan2f(float __x, float __y) { return __ocml_atan2_f32(__x, __y); } 110 __DEVICE__ 111 inline float atanf(float __x) { return __ocml_atan_f32(__x); } 112 __DEVICE__ 113 inline float atanhf(float __x) { return __ocml_atanh_f32(__x); } 114 __DEVICE__ 115 inline float cbrtf(float __x) { return __ocml_cbrt_f32(__x); } 116 __DEVICE__ 117 inline float ceilf(float __x) { return __ocml_ceil_f32(__x); } 118 __DEVICE__ 119 inline float copysignf(float __x, float __y) { 120 return __ocml_copysign_f32(__x, __y); 121 } 122 __DEVICE__ 123 inline float cosf(float __x) { return __ocml_cos_f32(__x); } 124 __DEVICE__ 125 inline float coshf(float __x) { return __ocml_cosh_f32(__x); } 126 __DEVICE__ 127 inline float cospif(float __x) { return __ocml_cospi_f32(__x); } 128 __DEVICE__ 129 inline float cyl_bessel_i0f(float __x) { return __ocml_i0_f32(__x); } 130 __DEVICE__ 131 inline float cyl_bessel_i1f(float __x) { return __ocml_i1_f32(__x); } 132 __DEVICE__ 133 inline float erfcf(float __x) { return __ocml_erfc_f32(__x); } 134 __DEVICE__ 135 inline float erfcinvf(float __x) { return __ocml_erfcinv_f32(__x); } 136 __DEVICE__ 137 inline float erfcxf(float __x) { return __ocml_erfcx_f32(__x); } 138 __DEVICE__ 139 inline float erff(float __x) { return __ocml_erf_f32(__x); } 140 __DEVICE__ 141 inline float erfinvf(float __x) { return __ocml_erfinv_f32(__x); } 142 __DEVICE__ 143 inline float exp10f(float __x) { return __ocml_exp10_f32(__x); } 144 __DEVICE__ 145 inline float exp2f(float __x) { return __ocml_exp2_f32(__x); } 146 __DEVICE__ 147 inline float expf(float __x) { return __ocml_exp_f32(__x); } 148 __DEVICE__ 149 inline float expm1f(float __x) { return __ocml_expm1_f32(__x); } 150 __DEVICE__ 151 inline float fabsf(float __x) { return __ocml_fabs_f32(__x); } 152 __DEVICE__ 153 inline float fdimf(float __x, float __y) { return __ocml_fdim_f32(__x, __y); } 154 __DEVICE__ 155 inline float fdividef(float __x, float __y) { return __x / __y; } 156 __DEVICE__ 157 inline float floorf(float __x) { return __ocml_floor_f32(__x); } 158 __DEVICE__ 159 inline float fmaf(float __x, float __y, float __z) { 160 return __ocml_fma_f32(__x, __y, __z); 161 } 162 __DEVICE__ 163 inline float fmaxf(float __x, float __y) { return __ocml_fmax_f32(__x, __y); } 164 __DEVICE__ 165 inline float fminf(float __x, float __y) { return __ocml_fmin_f32(__x, __y); } 166 __DEVICE__ 167 inline float fmodf(float __x, float __y) { return __ocml_fmod_f32(__x, __y); } 168 __DEVICE__ 169 inline float frexpf(float __x, int *__nptr) { 170 int __tmp; 171 float __r = 172 __ocml_frexp_f32(__x, (__attribute__((address_space(5))) int *)&__tmp); 173 *__nptr = __tmp; 174 175 return __r; 176 } 177 __DEVICE__ 178 inline float hypotf(float __x, float __y) { return __ocml_hypot_f32(__x, __y); } 179 __DEVICE__ 180 inline int ilogbf(float __x) { return __ocml_ilogb_f32(__x); } 181 __DEVICE__ 182 inline __RETURN_TYPE isfinite(float __x) { return __ocml_isfinite_f32(__x); } 183 __DEVICE__ 184 inline __RETURN_TYPE isinf(float __x) { return __ocml_isinf_f32(__x); } 185 __DEVICE__ 186 inline __RETURN_TYPE isnan(float __x) { return __ocml_isnan_f32(__x); } 187 __DEVICE__ 188 inline float j0f(float __x) { return __ocml_j0_f32(__x); } 189 __DEVICE__ 190 inline float j1f(float __x) { return __ocml_j1_f32(__x); } 191 __DEVICE__ 192 inline float jnf(int __n, 193 float __x) { // TODO: we could use Ahmes multiplication 194 // and the Miller & Brown algorithm 195 // for linear recurrences to get O(log n) steps, but it's unclear if 196 // it'd be beneficial in this case. 197 if (__n == 0) 198 return j0f(__x); 199 if (__n == 1) 200 return j1f(__x); 201 202 float __x0 = j0f(__x); 203 float __x1 = j1f(__x); 204 for (int __i = 1; __i < __n; ++__i) { 205 float __x2 = (2 * __i) / __x * __x1 - __x0; 206 __x0 = __x1; 207 __x1 = __x2; 208 } 209 210 return __x1; 211 } 212 __DEVICE__ 213 inline float ldexpf(float __x, int __e) { return __ocml_ldexp_f32(__x, __e); } 214 __DEVICE__ 215 inline float lgammaf(float __x) { return __ocml_lgamma_f32(__x); } 216 __DEVICE__ 217 inline long long int llrintf(float __x) { return __ocml_rint_f32(__x); } 218 __DEVICE__ 219 inline long long int llroundf(float __x) { return __ocml_round_f32(__x); } 220 __DEVICE__ 221 inline float log10f(float __x) { return __ocml_log10_f32(__x); } 222 __DEVICE__ 223 inline float log1pf(float __x) { return __ocml_log1p_f32(__x); } 224 __DEVICE__ 225 inline float log2f(float __x) { return __ocml_log2_f32(__x); } 226 __DEVICE__ 227 inline float logbf(float __x) { return __ocml_logb_f32(__x); } 228 __DEVICE__ 229 inline float logf(float __x) { return __ocml_log_f32(__x); } 230 __DEVICE__ 231 inline long int lrintf(float __x) { return __ocml_rint_f32(__x); } 232 __DEVICE__ 233 inline long int lroundf(float __x) { return __ocml_round_f32(__x); } 234 __DEVICE__ 235 inline float modff(float __x, float *__iptr) { 236 float __tmp; 237 float __r = 238 __ocml_modf_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 239 *__iptr = __tmp; 240 241 return __r; 242 } 243 __DEVICE__ 244 inline float nanf(const char *__tagp) { 245 union { 246 float val; 247 struct ieee_float { 248 uint32_t mantissa : 22; 249 uint32_t quiet : 1; 250 uint32_t exponent : 8; 251 uint32_t sign : 1; 252 } bits; 253 254 static_assert(sizeof(float) == sizeof(ieee_float), ""); 255 } __tmp; 256 257 __tmp.bits.sign = 0u; 258 __tmp.bits.exponent = ~0u; 259 __tmp.bits.quiet = 1u; 260 __tmp.bits.mantissa = __make_mantissa(__tagp); 261 262 return __tmp.val; 263 } 264 __DEVICE__ 265 inline float nearbyintf(float __x) { return __ocml_nearbyint_f32(__x); } 266 __DEVICE__ 267 inline float nextafterf(float __x, float __y) { 268 return __ocml_nextafter_f32(__x, __y); 269 } 270 __DEVICE__ 271 inline float norm3df(float __x, float __y, float __z) { 272 return __ocml_len3_f32(__x, __y, __z); 273 } 274 __DEVICE__ 275 inline float norm4df(float __x, float __y, float __z, float __w) { 276 return __ocml_len4_f32(__x, __y, __z, __w); 277 } 278 __DEVICE__ 279 inline float normcdff(float __x) { return __ocml_ncdf_f32(__x); } 280 __DEVICE__ 281 inline float normcdfinvf(float __x) { return __ocml_ncdfinv_f32(__x); } 282 __DEVICE__ 283 inline float 284 normf(int __dim, 285 const float *__a) { // TODO: placeholder until OCML adds support. 286 float __r = 0; 287 while (__dim--) { 288 __r += __a[0] * __a[0]; 289 ++__a; 290 } 291 292 return __ocml_sqrt_f32(__r); 293 } 294 __DEVICE__ 295 inline float powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 296 __DEVICE__ 297 inline float rcbrtf(float __x) { return __ocml_rcbrt_f32(__x); } 298 __DEVICE__ 299 inline float remainderf(float __x, float __y) { 300 return __ocml_remainder_f32(__x, __y); 301 } 302 __DEVICE__ 303 inline float remquof(float __x, float __y, int *__quo) { 304 int __tmp; 305 float __r = __ocml_remquo_f32( 306 __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 307 *__quo = __tmp; 308 309 return __r; 310 } 311 __DEVICE__ 312 inline float rhypotf(float __x, float __y) { 313 return __ocml_rhypot_f32(__x, __y); 314 } 315 __DEVICE__ 316 inline float rintf(float __x) { return __ocml_rint_f32(__x); } 317 __DEVICE__ 318 inline float rnorm3df(float __x, float __y, float __z) { 319 return __ocml_rlen3_f32(__x, __y, __z); 320 } 321 322 __DEVICE__ 323 inline float rnorm4df(float __x, float __y, float __z, float __w) { 324 return __ocml_rlen4_f32(__x, __y, __z, __w); 325 } 326 __DEVICE__ 327 inline float 328 rnormf(int __dim, 329 const float *__a) { // TODO: placeholder until OCML adds support. 330 float __r = 0; 331 while (__dim--) { 332 __r += __a[0] * __a[0]; 333 ++__a; 334 } 335 336 return __ocml_rsqrt_f32(__r); 337 } 338 __DEVICE__ 339 inline float roundf(float __x) { return __ocml_round_f32(__x); } 340 __DEVICE__ 341 inline float rsqrtf(float __x) { return __ocml_rsqrt_f32(__x); } 342 __DEVICE__ 343 inline float scalblnf(float __x, long int __n) { 344 return (__n < INT_MAX) ? __ocml_scalbn_f32(__x, __n) 345 : __ocml_scalb_f32(__x, __n); 346 } 347 __DEVICE__ 348 inline float scalbnf(float __x, int __n) { return __ocml_scalbn_f32(__x, __n); } 349 __DEVICE__ 350 inline __RETURN_TYPE signbit(float __x) { return __ocml_signbit_f32(__x); } 351 __DEVICE__ 352 inline void sincosf(float __x, float *__sinptr, float *__cosptr) { 353 float __tmp; 354 355 *__sinptr = 356 __ocml_sincos_f32(__x, (__attribute__((address_space(5))) float *)&__tmp); 357 *__cosptr = __tmp; 358 } 359 __DEVICE__ 360 inline void sincospif(float __x, float *__sinptr, float *__cosptr) { 361 float __tmp; 362 363 *__sinptr = __ocml_sincospi_f32( 364 __x, (__attribute__((address_space(5))) float *)&__tmp); 365 *__cosptr = __tmp; 366 } 367 __DEVICE__ 368 inline float sinf(float __x) { return __ocml_sin_f32(__x); } 369 __DEVICE__ 370 inline float sinhf(float __x) { return __ocml_sinh_f32(__x); } 371 __DEVICE__ 372 inline float sinpif(float __x) { return __ocml_sinpi_f32(__x); } 373 __DEVICE__ 374 inline float sqrtf(float __x) { return __ocml_sqrt_f32(__x); } 375 __DEVICE__ 376 inline float tanf(float __x) { return __ocml_tan_f32(__x); } 377 __DEVICE__ 378 inline float tanhf(float __x) { return __ocml_tanh_f32(__x); } 379 __DEVICE__ 380 inline float tgammaf(float __x) { return __ocml_tgamma_f32(__x); } 381 __DEVICE__ 382 inline float truncf(float __x) { return __ocml_trunc_f32(__x); } 383 __DEVICE__ 384 inline float y0f(float __x) { return __ocml_y0_f32(__x); } 385 __DEVICE__ 386 inline float y1f(float __x) { return __ocml_y1_f32(__x); } 387 __DEVICE__ 388 inline float ynf(int __n, 389 float __x) { // TODO: we could use Ahmes multiplication 390 // and the Miller & Brown algorithm 391 // for linear recurrences to get O(log n) steps, but it's unclear if 392 // it'd be beneficial in this case. Placeholder until OCML adds 393 // support. 394 if (__n == 0) 395 return y0f(__x); 396 if (__n == 1) 397 return y1f(__x); 398 399 float __x0 = y0f(__x); 400 float __x1 = y1f(__x); 401 for (int __i = 1; __i < __n; ++__i) { 402 float __x2 = (2 * __i) / __x * __x1 - __x0; 403 __x0 = __x1; 404 __x1 = __x2; 405 } 406 407 return __x1; 408 } 409 410 // BEGIN INTRINSICS 411 __DEVICE__ 412 inline float __cosf(float __x) { return __ocml_native_cos_f32(__x); } 413 __DEVICE__ 414 inline float __exp10f(float __x) { return __ocml_native_exp10_f32(__x); } 415 __DEVICE__ 416 inline float __expf(float __x) { return __ocml_native_exp_f32(__x); } 417 #if defined OCML_BASIC_ROUNDED_OPERATIONS 418 __DEVICE__ 419 inline float __fadd_rd(float __x, float __y) { 420 return __ocml_add_rtn_f32(__x, __y); 421 } 422 #endif 423 __DEVICE__ 424 inline float __fadd_rn(float __x, float __y) { return __x + __y; } 425 #if defined OCML_BASIC_ROUNDED_OPERATIONS 426 __DEVICE__ 427 inline float __fadd_ru(float __x, float __y) { 428 return __ocml_add_rtp_f32(__x, __y); 429 } 430 __DEVICE__ 431 inline float __fadd_rz(float __x, float __y) { 432 return __ocml_add_rtz_f32(__x, __y); 433 } 434 __DEVICE__ 435 inline float __fdiv_rd(float __x, float __y) { 436 return __ocml_div_rtn_f32(__x, __y); 437 } 438 #endif 439 __DEVICE__ 440 inline float __fdiv_rn(float __x, float __y) { return __x / __y; } 441 #if defined OCML_BASIC_ROUNDED_OPERATIONS 442 __DEVICE__ 443 inline float __fdiv_ru(float __x, float __y) { 444 return __ocml_div_rtp_f32(__x, __y); 445 } 446 __DEVICE__ 447 inline float __fdiv_rz(float __x, float __y) { 448 return __ocml_div_rtz_f32(__x, __y); 449 } 450 #endif 451 __DEVICE__ 452 inline float __fdividef(float __x, float __y) { return __x / __y; } 453 #if defined OCML_BASIC_ROUNDED_OPERATIONS 454 __DEVICE__ 455 inline float __fmaf_rd(float __x, float __y, float __z) { 456 return __ocml_fma_rtn_f32(__x, __y, __z); 457 } 458 #endif 459 __DEVICE__ 460 inline float __fmaf_rn(float __x, float __y, float __z) { 461 return __ocml_fma_f32(__x, __y, __z); 462 } 463 #if defined OCML_BASIC_ROUNDED_OPERATIONS 464 __DEVICE__ 465 inline float __fmaf_ru(float __x, float __y, float __z) { 466 return __ocml_fma_rtp_f32(__x, __y, __z); 467 } 468 __DEVICE__ 469 inline float __fmaf_rz(float __x, float __y, float __z) { 470 return __ocml_fma_rtz_f32(__x, __y, __z); 471 } 472 __DEVICE__ 473 inline float __fmul_rd(float __x, float __y) { 474 return __ocml_mul_rtn_f32(__x, __y); 475 } 476 #endif 477 __DEVICE__ 478 inline float __fmul_rn(float __x, float __y) { return __x * __y; } 479 #if defined OCML_BASIC_ROUNDED_OPERATIONS 480 __DEVICE__ 481 inline float __fmul_ru(float __x, float __y) { 482 return __ocml_mul_rtp_f32(__x, __y); 483 } 484 __DEVICE__ 485 inline float __fmul_rz(float __x, float __y) { 486 return __ocml_mul_rtz_f32(__x, __y); 487 } 488 __DEVICE__ 489 inline float __frcp_rd(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 490 #endif 491 __DEVICE__ 492 inline float __frcp_rn(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 493 #if defined OCML_BASIC_ROUNDED_OPERATIONS 494 __DEVICE__ 495 inline float __frcp_ru(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 496 __DEVICE__ 497 inline float __frcp_rz(float __x) { return __llvm_amdgcn_rcp_f32(__x); } 498 #endif 499 __DEVICE__ 500 inline float __frsqrt_rn(float __x) { return __llvm_amdgcn_rsq_f32(__x); } 501 #if defined OCML_BASIC_ROUNDED_OPERATIONS 502 __DEVICE__ 503 inline float __fsqrt_rd(float __x) { return __ocml_sqrt_rtn_f32(__x); } 504 #endif 505 __DEVICE__ 506 inline float __fsqrt_rn(float __x) { return __ocml_native_sqrt_f32(__x); } 507 #if defined OCML_BASIC_ROUNDED_OPERATIONS 508 __DEVICE__ 509 inline float __fsqrt_ru(float __x) { return __ocml_sqrt_rtp_f32(__x); } 510 __DEVICE__ 511 inline float __fsqrt_rz(float __x) { return __ocml_sqrt_rtz_f32(__x); } 512 __DEVICE__ 513 inline float __fsub_rd(float __x, float __y) { 514 return __ocml_sub_rtn_f32(__x, __y); 515 } 516 #endif 517 __DEVICE__ 518 inline float __fsub_rn(float __x, float __y) { return __x - __y; } 519 #if defined OCML_BASIC_ROUNDED_OPERATIONS 520 __DEVICE__ 521 inline float __fsub_ru(float __x, float __y) { 522 return __ocml_sub_rtp_f32(__x, __y); 523 } 524 __DEVICE__ 525 inline float __fsub_rz(float __x, float __y) { 526 return __ocml_sub_rtz_f32(__x, __y); 527 } 528 #endif 529 __DEVICE__ 530 inline float __log10f(float __x) { return __ocml_native_log10_f32(__x); } 531 __DEVICE__ 532 inline float __log2f(float __x) { return __ocml_native_log2_f32(__x); } 533 __DEVICE__ 534 inline float __logf(float __x) { return __ocml_native_log_f32(__x); } 535 __DEVICE__ 536 inline float __powf(float __x, float __y) { return __ocml_pow_f32(__x, __y); } 537 __DEVICE__ 538 inline float __saturatef(float __x) { 539 return (__x < 0) ? 0 : ((__x > 1) ? 1 : __x); 540 } 541 __DEVICE__ 542 inline void __sincosf(float __x, float *__sinptr, float *__cosptr) { 543 *__sinptr = __ocml_native_sin_f32(__x); 544 *__cosptr = __ocml_native_cos_f32(__x); 545 } 546 __DEVICE__ 547 inline float __sinf(float __x) { return __ocml_native_sin_f32(__x); } 548 __DEVICE__ 549 inline float __tanf(float __x) { return __ocml_tan_f32(__x); } 550 // END INTRINSICS 551 // END FLOAT 552 553 // BEGIN DOUBLE 554 __DEVICE__ 555 inline double abs(double __x) { return __ocml_fabs_f64(__x); } 556 __DEVICE__ 557 inline double acos(double __x) { return __ocml_acos_f64(__x); } 558 __DEVICE__ 559 inline double acosh(double __x) { return __ocml_acosh_f64(__x); } 560 __DEVICE__ 561 inline double asin(double __x) { return __ocml_asin_f64(__x); } 562 __DEVICE__ 563 inline double asinh(double __x) { return __ocml_asinh_f64(__x); } 564 __DEVICE__ 565 inline double atan(double __x) { return __ocml_atan_f64(__x); } 566 __DEVICE__ 567 inline double atan2(double __x, double __y) { 568 return __ocml_atan2_f64(__x, __y); 569 } 570 __DEVICE__ 571 inline double atanh(double __x) { return __ocml_atanh_f64(__x); } 572 __DEVICE__ 573 inline double cbrt(double __x) { return __ocml_cbrt_f64(__x); } 574 __DEVICE__ 575 inline double ceil(double __x) { return __ocml_ceil_f64(__x); } 576 __DEVICE__ 577 inline double copysign(double __x, double __y) { 578 return __ocml_copysign_f64(__x, __y); 579 } 580 __DEVICE__ 581 inline double cos(double __x) { return __ocml_cos_f64(__x); } 582 __DEVICE__ 583 inline double cosh(double __x) { return __ocml_cosh_f64(__x); } 584 __DEVICE__ 585 inline double cospi(double __x) { return __ocml_cospi_f64(__x); } 586 __DEVICE__ 587 inline double cyl_bessel_i0(double __x) { return __ocml_i0_f64(__x); } 588 __DEVICE__ 589 inline double cyl_bessel_i1(double __x) { return __ocml_i1_f64(__x); } 590 __DEVICE__ 591 inline double erf(double __x) { return __ocml_erf_f64(__x); } 592 __DEVICE__ 593 inline double erfc(double __x) { return __ocml_erfc_f64(__x); } 594 __DEVICE__ 595 inline double erfcinv(double __x) { return __ocml_erfcinv_f64(__x); } 596 __DEVICE__ 597 inline double erfcx(double __x) { return __ocml_erfcx_f64(__x); } 598 __DEVICE__ 599 inline double erfinv(double __x) { return __ocml_erfinv_f64(__x); } 600 __DEVICE__ 601 inline double exp(double __x) { return __ocml_exp_f64(__x); } 602 __DEVICE__ 603 inline double exp10(double __x) { return __ocml_exp10_f64(__x); } 604 __DEVICE__ 605 inline double exp2(double __x) { return __ocml_exp2_f64(__x); } 606 __DEVICE__ 607 inline double expm1(double __x) { return __ocml_expm1_f64(__x); } 608 __DEVICE__ 609 inline double fabs(double __x) { return __ocml_fabs_f64(__x); } 610 __DEVICE__ 611 inline double fdim(double __x, double __y) { return __ocml_fdim_f64(__x, __y); } 612 __DEVICE__ 613 inline double floor(double __x) { return __ocml_floor_f64(__x); } 614 __DEVICE__ 615 inline double fma(double __x, double __y, double __z) { 616 return __ocml_fma_f64(__x, __y, __z); 617 } 618 __DEVICE__ 619 inline double fmax(double __x, double __y) { return __ocml_fmax_f64(__x, __y); } 620 __DEVICE__ 621 inline double fmin(double __x, double __y) { return __ocml_fmin_f64(__x, __y); } 622 __DEVICE__ 623 inline double fmod(double __x, double __y) { return __ocml_fmod_f64(__x, __y); } 624 __DEVICE__ 625 inline double frexp(double __x, int *__nptr) { 626 int __tmp; 627 double __r = 628 __ocml_frexp_f64(__x, (__attribute__((address_space(5))) int *)&__tmp); 629 *__nptr = __tmp; 630 631 return __r; 632 } 633 __DEVICE__ 634 inline double hypot(double __x, double __y) { 635 return __ocml_hypot_f64(__x, __y); 636 } 637 __DEVICE__ 638 inline int ilogb(double __x) { return __ocml_ilogb_f64(__x); } 639 __DEVICE__ 640 inline __RETURN_TYPE isfinite(double __x) { return __ocml_isfinite_f64(__x); } 641 __DEVICE__ 642 inline __RETURN_TYPE isinf(double __x) { return __ocml_isinf_f64(__x); } 643 __DEVICE__ 644 inline __RETURN_TYPE isnan(double __x) { return __ocml_isnan_f64(__x); } 645 __DEVICE__ 646 inline double j0(double __x) { return __ocml_j0_f64(__x); } 647 __DEVICE__ 648 inline double j1(double __x) { return __ocml_j1_f64(__x); } 649 __DEVICE__ 650 inline double jn(int __n, 651 double __x) { // TODO: we could use Ahmes multiplication 652 // and the Miller & Brown algorithm 653 // for linear recurrences to get O(log n) steps, but it's unclear if 654 // it'd be beneficial in this case. Placeholder until OCML adds 655 // support. 656 if (__n == 0) 657 return j0f(__x); 658 if (__n == 1) 659 return j1f(__x); 660 661 double __x0 = j0f(__x); 662 double __x1 = j1f(__x); 663 for (int __i = 1; __i < __n; ++__i) { 664 double __x2 = (2 * __i) / __x * __x1 - __x0; 665 __x0 = __x1; 666 __x1 = __x2; 667 } 668 669 return __x1; 670 } 671 __DEVICE__ 672 inline double ldexp(double __x, int __e) { return __ocml_ldexp_f64(__x, __e); } 673 __DEVICE__ 674 inline double lgamma(double __x) { return __ocml_lgamma_f64(__x); } 675 __DEVICE__ 676 inline long long int llrint(double __x) { return __ocml_rint_f64(__x); } 677 __DEVICE__ 678 inline long long int llround(double __x) { return __ocml_round_f64(__x); } 679 __DEVICE__ 680 inline double log(double __x) { return __ocml_log_f64(__x); } 681 __DEVICE__ 682 inline double log10(double __x) { return __ocml_log10_f64(__x); } 683 __DEVICE__ 684 inline double log1p(double __x) { return __ocml_log1p_f64(__x); } 685 __DEVICE__ 686 inline double log2(double __x) { return __ocml_log2_f64(__x); } 687 __DEVICE__ 688 inline double logb(double __x) { return __ocml_logb_f64(__x); } 689 __DEVICE__ 690 inline long int lrint(double __x) { return __ocml_rint_f64(__x); } 691 __DEVICE__ 692 inline long int lround(double __x) { return __ocml_round_f64(__x); } 693 __DEVICE__ 694 inline double modf(double __x, double *__iptr) { 695 double __tmp; 696 double __r = 697 __ocml_modf_f64(__x, (__attribute__((address_space(5))) double *)&__tmp); 698 *__iptr = __tmp; 699 700 return __r; 701 } 702 __DEVICE__ 703 inline double nan(const char *__tagp) { 704 #if !_WIN32 705 union { 706 double val; 707 struct ieee_double { 708 uint64_t mantissa : 51; 709 uint32_t quiet : 1; 710 uint32_t exponent : 11; 711 uint32_t sign : 1; 712 } bits; 713 static_assert(sizeof(double) == sizeof(ieee_double), ""); 714 } __tmp; 715 716 __tmp.bits.sign = 0u; 717 __tmp.bits.exponent = ~0u; 718 __tmp.bits.quiet = 1u; 719 __tmp.bits.mantissa = __make_mantissa(__tagp); 720 721 return __tmp.val; 722 #else 723 static_assert(sizeof(uint64_t) == sizeof(double)); 724 uint64_t val = __make_mantissa(__tagp); 725 val |= 0xFFF << 51; 726 return *reinterpret_cast<double *>(&val); 727 #endif 728 } 729 __DEVICE__ 730 inline double nearbyint(double __x) { return __ocml_nearbyint_f64(__x); } 731 __DEVICE__ 732 inline double nextafter(double __x, double __y) { 733 return __ocml_nextafter_f64(__x, __y); 734 } 735 __DEVICE__ 736 inline double 737 norm(int __dim, 738 const double *__a) { // TODO: placeholder until OCML adds support. 739 double __r = 0; 740 while (__dim--) { 741 __r += __a[0] * __a[0]; 742 ++__a; 743 } 744 745 return __ocml_sqrt_f64(__r); 746 } 747 __DEVICE__ 748 inline double norm3d(double __x, double __y, double __z) { 749 return __ocml_len3_f64(__x, __y, __z); 750 } 751 __DEVICE__ 752 inline double norm4d(double __x, double __y, double __z, double __w) { 753 return __ocml_len4_f64(__x, __y, __z, __w); 754 } 755 __DEVICE__ 756 inline double normcdf(double __x) { return __ocml_ncdf_f64(__x); } 757 __DEVICE__ 758 inline double normcdfinv(double __x) { return __ocml_ncdfinv_f64(__x); } 759 __DEVICE__ 760 inline double pow(double __x, double __y) { return __ocml_pow_f64(__x, __y); } 761 __DEVICE__ 762 inline double rcbrt(double __x) { return __ocml_rcbrt_f64(__x); } 763 __DEVICE__ 764 inline double remainder(double __x, double __y) { 765 return __ocml_remainder_f64(__x, __y); 766 } 767 __DEVICE__ 768 inline double remquo(double __x, double __y, int *__quo) { 769 int __tmp; 770 double __r = __ocml_remquo_f64( 771 __x, __y, (__attribute__((address_space(5))) int *)&__tmp); 772 *__quo = __tmp; 773 774 return __r; 775 } 776 __DEVICE__ 777 inline double rhypot(double __x, double __y) { 778 return __ocml_rhypot_f64(__x, __y); 779 } 780 __DEVICE__ 781 inline double rint(double __x) { return __ocml_rint_f64(__x); } 782 __DEVICE__ 783 inline double 784 rnorm(int __dim, 785 const double *__a) { // TODO: placeholder until OCML adds support. 786 double __r = 0; 787 while (__dim--) { 788 __r += __a[0] * __a[0]; 789 ++__a; 790 } 791 792 return __ocml_rsqrt_f64(__r); 793 } 794 __DEVICE__ 795 inline double rnorm3d(double __x, double __y, double __z) { 796 return __ocml_rlen3_f64(__x, __y, __z); 797 } 798 __DEVICE__ 799 inline double rnorm4d(double __x, double __y, double __z, double __w) { 800 return __ocml_rlen4_f64(__x, __y, __z, __w); 801 } 802 __DEVICE__ 803 inline double round(double __x) { return __ocml_round_f64(__x); } 804 __DEVICE__ 805 inline double rsqrt(double __x) { return __ocml_rsqrt_f64(__x); } 806 __DEVICE__ 807 inline double scalbln(double __x, long int __n) { 808 return (__n < INT_MAX) ? __ocml_scalbn_f64(__x, __n) 809 : __ocml_scalb_f64(__x, __n); 810 } 811 __DEVICE__ 812 inline double scalbn(double __x, int __n) { 813 return __ocml_scalbn_f64(__x, __n); 814 } 815 __DEVICE__ 816 inline __RETURN_TYPE signbit(double __x) { return __ocml_signbit_f64(__x); } 817 __DEVICE__ 818 inline double sin(double __x) { return __ocml_sin_f64(__x); } 819 __DEVICE__ 820 inline void sincos(double __x, double *__sinptr, double *__cosptr) { 821 double __tmp; 822 *__sinptr = __ocml_sincos_f64( 823 __x, (__attribute__((address_space(5))) double *)&__tmp); 824 *__cosptr = __tmp; 825 } 826 __DEVICE__ 827 inline void sincospi(double __x, double *__sinptr, double *__cosptr) { 828 double __tmp; 829 *__sinptr = __ocml_sincospi_f64( 830 __x, (__attribute__((address_space(5))) double *)&__tmp); 831 *__cosptr = __tmp; 832 } 833 __DEVICE__ 834 inline double sinh(double __x) { return __ocml_sinh_f64(__x); } 835 __DEVICE__ 836 inline double sinpi(double __x) { return __ocml_sinpi_f64(__x); } 837 __DEVICE__ 838 inline double sqrt(double __x) { return __ocml_sqrt_f64(__x); } 839 __DEVICE__ 840 inline double tan(double __x) { return __ocml_tan_f64(__x); } 841 __DEVICE__ 842 inline double tanh(double __x) { return __ocml_tanh_f64(__x); } 843 __DEVICE__ 844 inline double tgamma(double __x) { return __ocml_tgamma_f64(__x); } 845 __DEVICE__ 846 inline double trunc(double __x) { return __ocml_trunc_f64(__x); } 847 __DEVICE__ 848 inline double y0(double __x) { return __ocml_y0_f64(__x); } 849 __DEVICE__ 850 inline double y1(double __x) { return __ocml_y1_f64(__x); } 851 __DEVICE__ 852 inline double yn(int __n, 853 double __x) { // TODO: we could use Ahmes multiplication 854 // and the Miller & Brown algorithm 855 // for linear recurrences to get O(log n) steps, but it's unclear if 856 // it'd be beneficial in this case. Placeholder until OCML adds 857 // support. 858 if (__n == 0) 859 return j0f(__x); 860 if (__n == 1) 861 return j1f(__x); 862 863 double __x0 = j0f(__x); 864 double __x1 = j1f(__x); 865 for (int __i = 1; __i < __n; ++__i) { 866 double __x2 = (2 * __i) / __x * __x1 - __x0; 867 __x0 = __x1; 868 __x1 = __x2; 869 } 870 871 return __x1; 872 } 873 874 // BEGIN INTRINSICS 875 #if defined OCML_BASIC_ROUNDED_OPERATIONS 876 __DEVICE__ 877 inline double __dadd_rd(double __x, double __y) { 878 return __ocml_add_rtn_f64(__x, __y); 879 } 880 #endif 881 __DEVICE__ 882 inline double __dadd_rn(double __x, double __y) { return __x + __y; } 883 #if defined OCML_BASIC_ROUNDED_OPERATIONS 884 __DEVICE__ 885 inline double __dadd_ru(double __x, double __y) { 886 return __ocml_add_rtp_f64(__x, __y); 887 } 888 __DEVICE__ 889 inline double __dadd_rz(double __x, double __y) { 890 return __ocml_add_rtz_f64(__x, __y); 891 } 892 __DEVICE__ 893 inline double __ddiv_rd(double __x, double __y) { 894 return __ocml_div_rtn_f64(__x, __y); 895 } 896 #endif 897 __DEVICE__ 898 inline double __ddiv_rn(double __x, double __y) { return __x / __y; } 899 #if defined OCML_BASIC_ROUNDED_OPERATIONS 900 __DEVICE__ 901 inline double __ddiv_ru(double __x, double __y) { 902 return __ocml_div_rtp_f64(__x, __y); 903 } 904 __DEVICE__ 905 inline double __ddiv_rz(double __x, double __y) { 906 return __ocml_div_rtz_f64(__x, __y); 907 } 908 __DEVICE__ 909 inline double __dmul_rd(double __x, double __y) { 910 return __ocml_mul_rtn_f64(__x, __y); 911 } 912 #endif 913 __DEVICE__ 914 inline double __dmul_rn(double __x, double __y) { return __x * __y; } 915 #if defined OCML_BASIC_ROUNDED_OPERATIONS 916 __DEVICE__ 917 inline double __dmul_ru(double __x, double __y) { 918 return __ocml_mul_rtp_f64(__x, __y); 919 } 920 __DEVICE__ 921 inline double __dmul_rz(double __x, double __y) { 922 return __ocml_mul_rtz_f64(__x, __y); 923 } 924 __DEVICE__ 925 inline double __drcp_rd(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 926 #endif 927 __DEVICE__ 928 inline double __drcp_rn(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 929 #if defined OCML_BASIC_ROUNDED_OPERATIONS 930 __DEVICE__ 931 inline double __drcp_ru(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 932 __DEVICE__ 933 inline double __drcp_rz(double __x) { return __llvm_amdgcn_rcp_f64(__x); } 934 __DEVICE__ 935 inline double __dsqrt_rd(double __x) { return __ocml_sqrt_rtn_f64(__x); } 936 #endif 937 __DEVICE__ 938 inline double __dsqrt_rn(double __x) { return __ocml_sqrt_f64(__x); } 939 #if defined OCML_BASIC_ROUNDED_OPERATIONS 940 __DEVICE__ 941 inline double __dsqrt_ru(double __x) { return __ocml_sqrt_rtp_f64(__x); } 942 __DEVICE__ 943 inline double __dsqrt_rz(double __x) { return __ocml_sqrt_rtz_f64(__x); } 944 __DEVICE__ 945 inline double __dsub_rd(double __x, double __y) { 946 return __ocml_sub_rtn_f64(__x, __y); 947 } 948 #endif 949 __DEVICE__ 950 inline double __dsub_rn(double __x, double __y) { return __x - __y; } 951 #if defined OCML_BASIC_ROUNDED_OPERATIONS 952 __DEVICE__ 953 inline double __dsub_ru(double __x, double __y) { 954 return __ocml_sub_rtp_f64(__x, __y); 955 } 956 __DEVICE__ 957 inline double __dsub_rz(double __x, double __y) { 958 return __ocml_sub_rtz_f64(__x, __y); 959 } 960 __DEVICE__ 961 inline double __fma_rd(double __x, double __y, double __z) { 962 return __ocml_fma_rtn_f64(__x, __y, __z); 963 } 964 #endif 965 __DEVICE__ 966 inline double __fma_rn(double __x, double __y, double __z) { 967 return __ocml_fma_f64(__x, __y, __z); 968 } 969 #if defined OCML_BASIC_ROUNDED_OPERATIONS 970 __DEVICE__ 971 inline double __fma_ru(double __x, double __y, double __z) { 972 return __ocml_fma_rtp_f64(__x, __y, __z); 973 } 974 __DEVICE__ 975 inline double __fma_rz(double __x, double __y, double __z) { 976 return __ocml_fma_rtz_f64(__x, __y, __z); 977 } 978 #endif 979 // END INTRINSICS 980 // END DOUBLE 981 982 // BEGIN INTEGER 983 __DEVICE__ 984 inline int abs(int __x) { 985 int __sgn = __x >> (sizeof(int) * CHAR_BIT - 1); 986 return (__x ^ __sgn) - __sgn; 987 } 988 __DEVICE__ 989 inline long labs(long __x) { 990 long __sgn = __x >> (sizeof(long) * CHAR_BIT - 1); 991 return (__x ^ __sgn) - __sgn; 992 } 993 __DEVICE__ 994 inline long long llabs(long long __x) { 995 long long __sgn = __x >> (sizeof(long long) * CHAR_BIT - 1); 996 return (__x ^ __sgn) - __sgn; 997 } 998 999 #if defined(__cplusplus) 1000 __DEVICE__ 1001 inline long abs(long __x) { return labs(__x); } 1002 __DEVICE__ 1003 inline long long abs(long long __x) { return llabs(__x); } 1004 #endif 1005 // END INTEGER 1006 1007 __DEVICE__ 1008 inline _Float16 fma(_Float16 __x, _Float16 __y, _Float16 __z) { 1009 return __ocml_fma_f16(__x, __y, __z); 1010 } 1011 1012 __DEVICE__ 1013 inline float fma(float __x, float __y, float __z) { 1014 return fmaf(__x, __y, __z); 1015 } 1016 1017 #pragma push_macro("__DEF_FUN1") 1018 #pragma push_macro("__DEF_FUN2") 1019 #pragma push_macro("__DEF_FUNI") 1020 #pragma push_macro("__DEF_FLOAT_FUN2I") 1021 #pragma push_macro("__HIP_OVERLOAD1") 1022 #pragma push_macro("__HIP_OVERLOAD2") 1023 1024 // __hip_enable_if::type is a type function which returns __T if __B is true. 1025 template <bool __B, class __T = void> struct __hip_enable_if {}; 1026 1027 template <class __T> struct __hip_enable_if<true, __T> { typedef __T type; }; 1028 1029 // __HIP_OVERLOAD1 is used to resolve function calls with integer argument to 1030 // avoid compilation error due to ambibuity. e.g. floor(5) is resolved with 1031 // floor(double). 1032 #define __HIP_OVERLOAD1(__retty, __fn) \ 1033 template <typename __T> \ 1034 __DEVICE__ typename __hip_enable_if<std::numeric_limits<__T>::is_integer, \ 1035 __retty>::type \ 1036 __fn(__T __x) { \ 1037 return ::__fn((double)__x); \ 1038 } 1039 1040 // __HIP_OVERLOAD2 is used to resolve function calls with mixed float/double 1041 // or integer argument to avoid compilation error due to ambibuity. e.g. 1042 // max(5.0f, 6.0) is resolved with max(double, double). 1043 #define __HIP_OVERLOAD2(__retty, __fn) \ 1044 template <typename __T1, typename __T2> \ 1045 __DEVICE__ \ 1046 typename __hip_enable_if<std::numeric_limits<__T1>::is_specialized && \ 1047 std::numeric_limits<__T2>::is_specialized, \ 1048 __retty>::type \ 1049 __fn(__T1 __x, __T2 __y) { \ 1050 return __fn((double)__x, (double)__y); \ 1051 } 1052 1053 // Define cmath functions with float argument and returns float. 1054 #define __DEF_FUN1(__retty, __func) \ 1055 __DEVICE__ \ 1056 inline float __func(float __x) { return __func##f(__x); } \ 1057 __HIP_OVERLOAD1(__retty, __func) 1058 1059 // Define cmath functions with float argument and returns __retty. 1060 #define __DEF_FUNI(__retty, __func) \ 1061 __DEVICE__ \ 1062 inline __retty __func(float __x) { return __func##f(__x); } \ 1063 __HIP_OVERLOAD1(__retty, __func) 1064 1065 // define cmath functions with two float arguments. 1066 #define __DEF_FUN2(__retty, __func) \ 1067 __DEVICE__ \ 1068 inline float __func(float __x, float __y) { return __func##f(__x, __y); } \ 1069 __HIP_OVERLOAD2(__retty, __func) 1070 1071 __DEF_FUN1(double, acos) 1072 __DEF_FUN1(double, acosh) 1073 __DEF_FUN1(double, asin) 1074 __DEF_FUN1(double, asinh) 1075 __DEF_FUN1(double, atan) 1076 __DEF_FUN2(double, atan2); 1077 __DEF_FUN1(double, atanh) 1078 __DEF_FUN1(double, cbrt) 1079 __DEF_FUN1(double, ceil) 1080 __DEF_FUN2(double, copysign); 1081 __DEF_FUN1(double, cos) 1082 __DEF_FUN1(double, cosh) 1083 __DEF_FUN1(double, erf) 1084 __DEF_FUN1(double, erfc) 1085 __DEF_FUN1(double, exp) 1086 __DEF_FUN1(double, exp2) 1087 __DEF_FUN1(double, expm1) 1088 __DEF_FUN1(double, fabs) 1089 __DEF_FUN2(double, fdim); 1090 __DEF_FUN1(double, floor) 1091 __DEF_FUN2(double, fmax); 1092 __DEF_FUN2(double, fmin); 1093 __DEF_FUN2(double, fmod); 1094 //__HIP_OVERLOAD1(int, fpclassify) 1095 __DEF_FUN2(double, hypot); 1096 __DEF_FUNI(int, ilogb) 1097 __HIP_OVERLOAD1(bool, isfinite) 1098 __HIP_OVERLOAD2(bool, isgreater); 1099 __HIP_OVERLOAD2(bool, isgreaterequal); 1100 __HIP_OVERLOAD1(bool, isinf); 1101 __HIP_OVERLOAD2(bool, isless); 1102 __HIP_OVERLOAD2(bool, islessequal); 1103 __HIP_OVERLOAD2(bool, islessgreater); 1104 __HIP_OVERLOAD1(bool, isnan); 1105 //__HIP_OVERLOAD1(bool, isnormal) 1106 __HIP_OVERLOAD2(bool, isunordered); 1107 __DEF_FUN1(double, lgamma) 1108 __DEF_FUN1(double, log) 1109 __DEF_FUN1(double, log10) 1110 __DEF_FUN1(double, log1p) 1111 __DEF_FUN1(double, log2) 1112 __DEF_FUN1(double, logb) 1113 __DEF_FUNI(long long, llrint) 1114 __DEF_FUNI(long long, llround) 1115 __DEF_FUNI(long, lrint) 1116 __DEF_FUNI(long, lround) 1117 __DEF_FUN1(double, nearbyint); 1118 __DEF_FUN2(double, nextafter); 1119 __DEF_FUN2(double, pow); 1120 __DEF_FUN2(double, remainder); 1121 __DEF_FUN1(double, rint); 1122 __DEF_FUN1(double, round); 1123 __HIP_OVERLOAD1(bool, signbit) 1124 __DEF_FUN1(double, sin) 1125 __DEF_FUN1(double, sinh) 1126 __DEF_FUN1(double, sqrt) 1127 __DEF_FUN1(double, tan) 1128 __DEF_FUN1(double, tanh) 1129 __DEF_FUN1(double, tgamma) 1130 __DEF_FUN1(double, trunc); 1131 1132 // define cmath functions with a float and an integer argument. 1133 #define __DEF_FLOAT_FUN2I(__func) \ 1134 __DEVICE__ \ 1135 inline float __func(float __x, int __y) { return __func##f(__x, __y); } 1136 __DEF_FLOAT_FUN2I(scalbn) 1137 1138 template <class T> __DEVICE__ inline T min(T __arg1, T __arg2) { 1139 return (__arg1 < __arg2) ? __arg1 : __arg2; 1140 } 1141 1142 template <class T> __DEVICE__ inline T max(T __arg1, T __arg2) { 1143 return (__arg1 > __arg2) ? __arg1 : __arg2; 1144 } 1145 1146 __DEVICE__ inline int min(int __arg1, int __arg2) { 1147 return (__arg1 < __arg2) ? __arg1 : __arg2; 1148 } 1149 __DEVICE__ inline int max(int __arg1, int __arg2) { 1150 return (__arg1 > __arg2) ? __arg1 : __arg2; 1151 } 1152 1153 __DEVICE__ 1154 inline float max(float __x, float __y) { return fmaxf(__x, __y); } 1155 1156 __DEVICE__ 1157 inline double max(double __x, double __y) { return fmax(__x, __y); } 1158 1159 __DEVICE__ 1160 inline float min(float __x, float __y) { return fminf(__x, __y); } 1161 1162 __DEVICE__ 1163 inline double min(double __x, double __y) { return fmin(__x, __y); } 1164 1165 __HIP_OVERLOAD2(double, max) 1166 __HIP_OVERLOAD2(double, min) 1167 1168 __host__ inline static int min(int __arg1, int __arg2) { 1169 return std::min(__arg1, __arg2); 1170 } 1171 1172 __host__ inline static int max(int __arg1, int __arg2) { 1173 return std::max(__arg1, __arg2); 1174 } 1175 1176 #pragma pop_macro("__DEF_FUN1") 1177 #pragma pop_macro("__DEF_FUN2") 1178 #pragma pop_macro("__DEF_FUNI") 1179 #pragma pop_macro("__DEF_FLOAT_FUN2I") 1180 #pragma pop_macro("__HIP_OVERLOAD1") 1181 #pragma pop_macro("__HIP_OVERLOAD2") 1182 #pragma pop_macro("__DEVICE__") 1183 #pragma pop_macro("__RETURN_TYPE") 1184 1185 #endif // __CLANG_HIP_MATH_H__ 1186