1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __CLANG_CUDA_CMATH_H__ 10 #define __CLANG_CUDA_CMATH_H__ 11 #ifndef __CUDA__ 12 #error "This file is for CUDA compilation only." 13 #endif 14 15 #ifndef __OPENMP_NVPTX__ 16 #include <limits> 17 #endif 18 19 // CUDA lets us use various std math functions on the device side. This file 20 // works in concert with __clang_cuda_math_forward_declares.h to make this work. 21 // 22 // Specifically, the forward-declares header declares __device__ overloads for 23 // these functions in the global namespace, then pulls them into namespace std 24 // with 'using' statements. Then this file implements those functions, after 25 // their implementations have been pulled in. 26 // 27 // It's important that we declare the functions in the global namespace and pull 28 // them into namespace std with using statements, as opposed to simply declaring 29 // these functions in namespace std, because our device functions need to 30 // overload the standard library functions, which may be declared in the global 31 // namespace or in std, depending on the degree of conformance of the stdlib 32 // implementation. Declaring in the global namespace and pulling into namespace 33 // std covers all of the known knowns. 34 35 #ifdef __OPENMP_NVPTX__ 36 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) 37 #else 38 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 39 #endif 40 41 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 42 __DEVICE__ long abs(long __n) { return ::labs(__n); } 43 __DEVICE__ float abs(float __x) { return ::fabsf(__x); } 44 __DEVICE__ double abs(double __x) { return ::fabs(__x); } 45 __DEVICE__ float acos(float __x) { return ::acosf(__x); } 46 __DEVICE__ float asin(float __x) { return ::asinf(__x); } 47 __DEVICE__ float atan(float __x) { return ::atanf(__x); } 48 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 49 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 50 __DEVICE__ float cos(float __x) { return ::cosf(__x); } 51 __DEVICE__ float cosh(float __x) { return ::coshf(__x); } 52 __DEVICE__ float exp(float __x) { return ::expf(__x); } 53 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); } 54 __DEVICE__ float floor(float __x) { return ::floorf(__x); } 55 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 56 __DEVICE__ int fpclassify(float __x) { 57 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 58 FP_ZERO, __x); 59 } 60 __DEVICE__ int fpclassify(double __x) { 61 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 62 FP_ZERO, __x); 63 } 64 __DEVICE__ float frexp(float __arg, int *__exp) { 65 return ::frexpf(__arg, __exp); 66 } 67 68 // For inscrutable reasons, the CUDA headers define these functions for us on 69 // Windows. 70 #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__) 71 72 // For OpenMP we work around some old system headers that have non-conforming 73 // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do 74 // this by providing two versions of these functions, differing only in the 75 // return type. To avoid conflicting definitions we disable implicit base 76 // function generation. That means we will end up with two specializations, one 77 // per type, but only one has a base function defined by the system header. 78 #if defined(__OPENMP_NVPTX__) 79 #pragma omp begin declare variant match( \ 80 implementation = {extension(disable_implicit_base)}) 81 82 // FIXME: We lack an extension to customize the mangling of the variants, e.g., 83 // add a suffix. This means we would clash with the names of the variants 84 // (note that we do not create implicit base functions here). To avoid 85 // this clash we add a new trait to some of them that is always true 86 // (this is LLVM after all ;)). It will only influence the mangled name 87 // of the variants inside the inner region and avoid the clash. 88 #pragma omp begin declare variant match(implementation = {vendor(llvm)}) 89 90 __DEVICE__ int isinf(float __x) { return ::__isinff(__x); } 91 __DEVICE__ int isinf(double __x) { return ::__isinf(__x); } 92 __DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } 93 __DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); } 94 __DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } 95 __DEVICE__ int isnan(double __x) { return ::__isnan(__x); } 96 97 #pragma omp end declare variant 98 99 #endif 100 101 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 102 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 103 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 104 // For inscrutable reasons, __finite(), the double-precision version of 105 // __finitef, does not exist when compiling for MacOS. __isfinited is available 106 // everywhere and is just as good. 107 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } 108 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 109 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 110 111 #if defined(__OPENMP_NVPTX__) 112 #pragma omp end declare variant 113 #endif 114 115 #endif 116 117 __DEVICE__ bool isgreater(float __x, float __y) { 118 return __builtin_isgreater(__x, __y); 119 } 120 __DEVICE__ bool isgreater(double __x, double __y) { 121 return __builtin_isgreater(__x, __y); 122 } 123 __DEVICE__ bool isgreaterequal(float __x, float __y) { 124 return __builtin_isgreaterequal(__x, __y); 125 } 126 __DEVICE__ bool isgreaterequal(double __x, double __y) { 127 return __builtin_isgreaterequal(__x, __y); 128 } 129 __DEVICE__ bool isless(float __x, float __y) { 130 return __builtin_isless(__x, __y); 131 } 132 __DEVICE__ bool isless(double __x, double __y) { 133 return __builtin_isless(__x, __y); 134 } 135 __DEVICE__ bool islessequal(float __x, float __y) { 136 return __builtin_islessequal(__x, __y); 137 } 138 __DEVICE__ bool islessequal(double __x, double __y) { 139 return __builtin_islessequal(__x, __y); 140 } 141 __DEVICE__ bool islessgreater(float __x, float __y) { 142 return __builtin_islessgreater(__x, __y); 143 } 144 __DEVICE__ bool islessgreater(double __x, double __y) { 145 return __builtin_islessgreater(__x, __y); 146 } 147 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 148 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 149 __DEVICE__ bool isunordered(float __x, float __y) { 150 return __builtin_isunordered(__x, __y); 151 } 152 __DEVICE__ bool isunordered(double __x, double __y) { 153 return __builtin_isunordered(__x, __y); 154 } 155 __DEVICE__ float ldexp(float __arg, int __exp) { 156 return ::ldexpf(__arg, __exp); 157 } 158 __DEVICE__ float log(float __x) { return ::logf(__x); } 159 __DEVICE__ float log10(float __x) { return ::log10f(__x); } 160 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 161 __DEVICE__ float pow(float __base, float __exp) { 162 return ::powf(__base, __exp); 163 } 164 __DEVICE__ float pow(float __base, int __iexp) { 165 return ::powif(__base, __iexp); 166 } 167 __DEVICE__ double pow(double __base, int __iexp) { 168 return ::powi(__base, __iexp); 169 } 170 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 171 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } 172 __DEVICE__ float sin(float __x) { return ::sinf(__x); } 173 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 174 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 175 __DEVICE__ float tan(float __x) { return ::tanf(__x); } 176 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 177 178 // There was a redefinition error for this this overload in CUDA mode. 179 // We restrict it to OpenMP mode for now, that is where it is actually needed 180 // anyway. 181 #ifdef __OPENMP_NVPTX__ 182 __DEVICE__ float remquo(float __n, float __d, int *__q) { 183 return ::remquof(__n, __d, __q); 184 } 185 #endif 186 187 // Notably missing above is nexttoward. We omit it because 188 // libdevice doesn't provide an implementation, and we don't want to be in the 189 // business of implementing tricky libm functions in this header. 190 191 #ifndef __OPENMP_NVPTX__ 192 193 // Now we've defined everything we promised we'd define in 194 // __clang_cuda_math_forward_declares.h. We need to do two additional things to 195 // fix up our math functions. 196 // 197 // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 198 // only sin(float) and sin(double), which means that e.g. sin(0) is 199 // ambiguous. 200 // 201 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace 202 // std. These are defined in the CUDA headers in the global namespace, 203 // independent of everything else we've done here. 204 205 // We can't use std::enable_if, because we want to be pre-C++11 compatible. But 206 // we go ahead and unconditionally define functions that are only available when 207 // compiling for C++11 to match the behavior of the CUDA headers. 208 template<bool __B, class __T = void> 209 struct __clang_cuda_enable_if {}; 210 211 template <class __T> struct __clang_cuda_enable_if<true, __T> { 212 typedef __T type; 213 }; 214 215 // Defines an overload of __fn that accepts one integral argument, calls 216 // __fn((double)x), and returns __retty. 217 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 218 template <typename __T> \ 219 __DEVICE__ \ 220 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 221 __retty>::type \ 222 __fn(__T __x) { \ 223 return ::__fn((double)__x); \ 224 } 225 226 // Defines an overload of __fn that accepts one two arithmetic arguments, calls 227 // __fn((double)x, (double)y), and returns a double. 228 // 229 // Note this is different from OVERLOAD_1, which generates an overload that 230 // accepts only *integral* arguments. 231 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 232 template <typename __T1, typename __T2> \ 233 __DEVICE__ typename __clang_cuda_enable_if< \ 234 std::numeric_limits<__T1>::is_specialized && \ 235 std::numeric_limits<__T2>::is_specialized, \ 236 __retty>::type \ 237 __fn(__T1 __x, __T2 __y) { \ 238 return __fn((double)__x, (double)__y); \ 239 } 240 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 268 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 269 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 270 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 271 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 272 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 273 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 274 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 275 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 276 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 277 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 278 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 279 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 280 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 281 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 282 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 283 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 284 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 285 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 286 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 287 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 288 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 289 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 290 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 291 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 292 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 293 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 294 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 295 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 296 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 297 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 298 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 299 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 300 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 301 302 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 303 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 304 305 // Overloads for functions that don't match the patterns expected by 306 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 307 template <typename __T1, typename __T2, typename __T3> 308 __DEVICE__ typename __clang_cuda_enable_if< 309 std::numeric_limits<__T1>::is_specialized && 310 std::numeric_limits<__T2>::is_specialized && 311 std::numeric_limits<__T3>::is_specialized, 312 double>::type 313 fma(__T1 __x, __T2 __y, __T3 __z) { 314 return std::fma((double)__x, (double)__y, (double)__z); 315 } 316 317 template <typename __T> 318 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 319 double>::type 320 frexp(__T __x, int *__exp) { 321 return std::frexp((double)__x, __exp); 322 } 323 324 template <typename __T> 325 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 326 double>::type 327 ldexp(__T __x, int __exp) { 328 return std::ldexp((double)__x, __exp); 329 } 330 331 template <typename __T1, typename __T2> 332 __DEVICE__ typename __clang_cuda_enable_if< 333 std::numeric_limits<__T1>::is_specialized && 334 std::numeric_limits<__T2>::is_specialized, 335 double>::type 336 remquo(__T1 __x, __T2 __y, int *__quo) { 337 return std::remquo((double)__x, (double)__y, __quo); 338 } 339 340 template <typename __T> 341 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 342 double>::type 343 scalbln(__T __x, long __exp) { 344 return std::scalbln((double)__x, __exp); 345 } 346 347 template <typename __T> 348 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 349 double>::type 350 scalbn(__T __x, int __exp) { 351 return std::scalbn((double)__x, __exp); 352 } 353 354 // We need to define these overloads in exactly the namespace our standard 355 // library uses (including the right inline namespace), otherwise they won't be 356 // picked up by other functions in the standard library (e.g. functions in 357 // <complex>). Thus the ugliness below. 358 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD 359 _LIBCPP_BEGIN_NAMESPACE_STD 360 #else 361 namespace std { 362 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 363 _GLIBCXX_BEGIN_NAMESPACE_VERSION 364 #endif 365 #endif 366 367 // Pull the new overloads we defined above into namespace std. 368 using ::acos; 369 using ::acosh; 370 using ::asin; 371 using ::asinh; 372 using ::atan; 373 using ::atan2; 374 using ::atanh; 375 using ::cbrt; 376 using ::ceil; 377 using ::copysign; 378 using ::cos; 379 using ::cosh; 380 using ::erf; 381 using ::erfc; 382 using ::exp; 383 using ::exp2; 384 using ::expm1; 385 using ::fabs; 386 using ::fdim; 387 using ::floor; 388 using ::fma; 389 using ::fmax; 390 using ::fmin; 391 using ::fmod; 392 using ::fpclassify; 393 using ::frexp; 394 using ::hypot; 395 using ::ilogb; 396 using ::isfinite; 397 using ::isgreater; 398 using ::isgreaterequal; 399 using ::isless; 400 using ::islessequal; 401 using ::islessgreater; 402 using ::isnormal; 403 using ::isunordered; 404 using ::ldexp; 405 using ::lgamma; 406 using ::llrint; 407 using ::llround; 408 using ::log; 409 using ::log10; 410 using ::log1p; 411 using ::log2; 412 using ::logb; 413 using ::lrint; 414 using ::lround; 415 using ::nearbyint; 416 using ::nextafter; 417 using ::pow; 418 using ::remainder; 419 using ::remquo; 420 using ::rint; 421 using ::round; 422 using ::scalbln; 423 using ::scalbn; 424 using ::signbit; 425 using ::sin; 426 using ::sinh; 427 using ::sqrt; 428 using ::tan; 429 using ::tanh; 430 using ::tgamma; 431 using ::trunc; 432 433 // Well this is fun: We need to pull these symbols in for libc++, but we can't 434 // pull them in with libstdc++, because its ::isinf and ::isnan are different 435 // than its std::isinf and std::isnan. 436 #ifndef __GLIBCXX__ 437 using ::isinf; 438 using ::isnan; 439 #endif 440 441 // Finally, pull the "foobarf" functions that CUDA defines in its headers into 442 // namespace std. 443 using ::acosf; 444 using ::acoshf; 445 using ::asinf; 446 using ::asinhf; 447 using ::atan2f; 448 using ::atanf; 449 using ::atanhf; 450 using ::cbrtf; 451 using ::ceilf; 452 using ::copysignf; 453 using ::cosf; 454 using ::coshf; 455 using ::erfcf; 456 using ::erff; 457 using ::exp2f; 458 using ::expf; 459 using ::expm1f; 460 using ::fabsf; 461 using ::fdimf; 462 using ::floorf; 463 using ::fmaf; 464 using ::fmaxf; 465 using ::fminf; 466 using ::fmodf; 467 using ::frexpf; 468 using ::hypotf; 469 using ::ilogbf; 470 using ::ldexpf; 471 using ::lgammaf; 472 using ::llrintf; 473 using ::llroundf; 474 using ::log10f; 475 using ::log1pf; 476 using ::log2f; 477 using ::logbf; 478 using ::logf; 479 using ::lrintf; 480 using ::lroundf; 481 using ::modff; 482 using ::nearbyintf; 483 using ::nextafterf; 484 using ::powf; 485 using ::remainderf; 486 using ::remquof; 487 using ::rintf; 488 using ::roundf; 489 using ::scalblnf; 490 using ::scalbnf; 491 using ::sinf; 492 using ::sinhf; 493 using ::sqrtf; 494 using ::tanf; 495 using ::tanhf; 496 using ::tgammaf; 497 using ::truncf; 498 499 #ifdef _LIBCPP_END_NAMESPACE_STD 500 _LIBCPP_END_NAMESPACE_STD 501 #else 502 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 503 _GLIBCXX_END_NAMESPACE_VERSION 504 #endif 505 } // namespace std 506 #endif 507 508 #endif // __OPENMP_NVPTX__ 509 510 #undef __DEVICE__ 511 512 #endif 513