1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __CLANG_CUDA_CMATH_H__ 10 #define __CLANG_CUDA_CMATH_H__ 11 #ifndef __CUDA__ 12 #error "This file is for CUDA compilation only." 13 #endif 14 15 #include <limits> 16 17 // CUDA lets us use various std math functions on the device side. This file 18 // works in concert with __clang_cuda_math_forward_declares.h to make this work. 19 // 20 // Specifically, the forward-declares header declares __device__ overloads for 21 // these functions in the global namespace, then pulls them into namespace std 22 // with 'using' statements. Then this file implements those functions, after 23 // their implementations have been pulled in. 24 // 25 // It's important that we declare the functions in the global namespace and pull 26 // them into namespace std with using statements, as opposed to simply declaring 27 // these functions in namespace std, because our device functions need to 28 // overload the standard library functions, which may be declared in the global 29 // namespace or in std, depending on the degree of conformance of the stdlib 30 // implementation. Declaring in the global namespace and pulling into namespace 31 // std covers all of the known knowns. 32 33 #ifdef _OPENMP 34 #define __DEVICE__ static __attribute__((always_inline)) 35 #else 36 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 37 #endif 38 39 // For C++ 17 we need to include noexcept attribute to be compatible 40 // with the header-defined version. This may be removed once 41 // variant is supported. 42 #if defined(_OPENMP) && defined(__cplusplus) && __cplusplus >= 201703L 43 #define __NOEXCEPT noexcept 44 #else 45 #define __NOEXCEPT 46 #endif 47 48 #if !(defined(_OPENMP) && defined(__cplusplus)) 49 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 50 __DEVICE__ long abs(long __n) { return ::labs(__n); } 51 __DEVICE__ float abs(float __x) { return ::fabsf(__x); } 52 __DEVICE__ double abs(double __x) { return ::fabs(__x); } 53 #endif 54 // TODO: remove once variat is supported. 55 #if defined(_OPENMP) && defined(__cplusplus) 56 __DEVICE__ const float abs(const float __x) { return ::fabsf((float)__x); } 57 __DEVICE__ const double abs(const double __x) { return ::fabs((double)__x); } 58 #endif 59 __DEVICE__ float acos(float __x) { return ::acosf(__x); } 60 __DEVICE__ float asin(float __x) { return ::asinf(__x); } 61 __DEVICE__ float atan(float __x) { return ::atanf(__x); } 62 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 63 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 64 __DEVICE__ float cos(float __x) { return ::cosf(__x); } 65 __DEVICE__ float cosh(float __x) { return ::coshf(__x); } 66 __DEVICE__ float exp(float __x) { return ::expf(__x); } 67 __DEVICE__ float fabs(float __x) __NOEXCEPT { return ::fabsf(__x); } 68 __DEVICE__ float floor(float __x) { return ::floorf(__x); } 69 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 70 // TODO: remove when variant is supported 71 #ifndef _OPENMP 72 __DEVICE__ int fpclassify(float __x) { 73 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 74 FP_ZERO, __x); 75 } 76 __DEVICE__ int fpclassify(double __x) { 77 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 78 FP_ZERO, __x); 79 } 80 #endif 81 __DEVICE__ float frexp(float __arg, int *__exp) { 82 return ::frexpf(__arg, __exp); 83 } 84 85 // For inscrutable reasons, the CUDA headers define these functions for us on 86 // Windows. 87 #ifndef _MSC_VER 88 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 89 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 90 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 91 // For inscrutable reasons, __finite(), the double-precision version of 92 // __finitef, does not exist when compiling for MacOS. __isfinited is available 93 // everywhere and is just as good. 94 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } 95 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 96 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 97 #endif 98 99 __DEVICE__ bool isgreater(float __x, float __y) { 100 return __builtin_isgreater(__x, __y); 101 } 102 __DEVICE__ bool isgreater(double __x, double __y) { 103 return __builtin_isgreater(__x, __y); 104 } 105 __DEVICE__ bool isgreaterequal(float __x, float __y) { 106 return __builtin_isgreaterequal(__x, __y); 107 } 108 __DEVICE__ bool isgreaterequal(double __x, double __y) { 109 return __builtin_isgreaterequal(__x, __y); 110 } 111 __DEVICE__ bool isless(float __x, float __y) { 112 return __builtin_isless(__x, __y); 113 } 114 __DEVICE__ bool isless(double __x, double __y) { 115 return __builtin_isless(__x, __y); 116 } 117 __DEVICE__ bool islessequal(float __x, float __y) { 118 return __builtin_islessequal(__x, __y); 119 } 120 __DEVICE__ bool islessequal(double __x, double __y) { 121 return __builtin_islessequal(__x, __y); 122 } 123 __DEVICE__ bool islessgreater(float __x, float __y) { 124 return __builtin_islessgreater(__x, __y); 125 } 126 __DEVICE__ bool islessgreater(double __x, double __y) { 127 return __builtin_islessgreater(__x, __y); 128 } 129 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 130 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 131 __DEVICE__ bool isunordered(float __x, float __y) { 132 return __builtin_isunordered(__x, __y); 133 } 134 __DEVICE__ bool isunordered(double __x, double __y) { 135 return __builtin_isunordered(__x, __y); 136 } 137 __DEVICE__ float ldexp(float __arg, int __exp) { 138 return ::ldexpf(__arg, __exp); 139 } 140 __DEVICE__ float log(float __x) { return ::logf(__x); } 141 __DEVICE__ float log10(float __x) { return ::log10f(__x); } 142 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 143 __DEVICE__ float pow(float __base, float __exp) { 144 return ::powf(__base, __exp); 145 } 146 __DEVICE__ float pow(float __base, int __iexp) { 147 return ::powif(__base, __iexp); 148 } 149 __DEVICE__ double pow(double __base, int __iexp) { 150 return ::powi(__base, __iexp); 151 } 152 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 153 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } 154 __DEVICE__ float sin(float __x) { return ::sinf(__x); } 155 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 156 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 157 __DEVICE__ float tan(float __x) { return ::tanf(__x); } 158 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 159 160 // Notably missing above is nexttoward. We omit it because 161 // libdevice doesn't provide an implementation, and we don't want to be in the 162 // business of implementing tricky libm functions in this header. 163 164 // Now we've defined everything we promised we'd define in 165 // __clang_cuda_math_forward_declares.h. We need to do two additional things to 166 // fix up our math functions. 167 // 168 // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 169 // only sin(float) and sin(double), which means that e.g. sin(0) is 170 // ambiguous. 171 // 172 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace 173 // std. These are defined in the CUDA headers in the global namespace, 174 // independent of everything else we've done here. 175 176 // We can't use std::enable_if, because we want to be pre-C++11 compatible. But 177 // we go ahead and unconditionally define functions that are only available when 178 // compiling for C++11 to match the behavior of the CUDA headers. 179 template<bool __B, class __T = void> 180 struct __clang_cuda_enable_if {}; 181 182 template <class __T> struct __clang_cuda_enable_if<true, __T> { 183 typedef __T type; 184 }; 185 186 // Defines an overload of __fn that accepts one integral argument, calls 187 // __fn((double)x), and returns __retty. 188 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 189 template <typename __T> \ 190 __DEVICE__ \ 191 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 192 __retty>::type \ 193 __fn(__T __x) { \ 194 return ::__fn((double)__x); \ 195 } 196 197 // Defines an overload of __fn that accepts one two arithmetic arguments, calls 198 // __fn((double)x, (double)y), and returns a double. 199 // 200 // Note this is different from OVERLOAD_1, which generates an overload that 201 // accepts only *integral* arguments. 202 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 203 template <typename __T1, typename __T2> \ 204 __DEVICE__ typename __clang_cuda_enable_if< \ 205 std::numeric_limits<__T1>::is_specialized && \ 206 std::numeric_limits<__T2>::is_specialized, \ 207 __retty>::type \ 208 __fn(__T1 __x, __T2 __y) { \ 209 return __fn((double)__x, (double)__y); \ 210 } 211 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 268 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 269 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 270 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 271 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 272 273 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 274 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 275 276 // Overloads for functions that don't match the patterns expected by 277 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 278 template <typename __T1, typename __T2, typename __T3> 279 __DEVICE__ typename __clang_cuda_enable_if< 280 std::numeric_limits<__T1>::is_specialized && 281 std::numeric_limits<__T2>::is_specialized && 282 std::numeric_limits<__T3>::is_specialized, 283 double>::type 284 fma(__T1 __x, __T2 __y, __T3 __z) { 285 return std::fma((double)__x, (double)__y, (double)__z); 286 } 287 288 template <typename __T> 289 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 290 double>::type 291 frexp(__T __x, int *__exp) { 292 return std::frexp((double)__x, __exp); 293 } 294 295 template <typename __T> 296 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 297 double>::type 298 ldexp(__T __x, int __exp) { 299 return std::ldexp((double)__x, __exp); 300 } 301 302 template <typename __T1, typename __T2> 303 __DEVICE__ typename __clang_cuda_enable_if< 304 std::numeric_limits<__T1>::is_specialized && 305 std::numeric_limits<__T2>::is_specialized, 306 double>::type 307 remquo(__T1 __x, __T2 __y, int *__quo) { 308 return std::remquo((double)__x, (double)__y, __quo); 309 } 310 311 template <typename __T> 312 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 313 double>::type 314 scalbln(__T __x, long __exp) { 315 return std::scalbln((double)__x, __exp); 316 } 317 318 template <typename __T> 319 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 320 double>::type 321 scalbn(__T __x, int __exp) { 322 return std::scalbn((double)__x, __exp); 323 } 324 325 // We need to define these overloads in exactly the namespace our standard 326 // library uses (including the right inline namespace), otherwise they won't be 327 // picked up by other functions in the standard library (e.g. functions in 328 // <complex>). Thus the ugliness below. 329 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD 330 _LIBCPP_BEGIN_NAMESPACE_STD 331 #else 332 namespace std { 333 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 334 _GLIBCXX_BEGIN_NAMESPACE_VERSION 335 #endif 336 #endif 337 338 // Pull the new overloads we defined above into namespace std. 339 using ::acos; 340 using ::acosh; 341 using ::asin; 342 using ::asinh; 343 using ::atan; 344 using ::atan2; 345 using ::atanh; 346 using ::cbrt; 347 using ::ceil; 348 using ::copysign; 349 using ::cos; 350 using ::cosh; 351 using ::erf; 352 using ::erfc; 353 using ::exp; 354 using ::exp2; 355 using ::expm1; 356 using ::fabs; 357 using ::fdim; 358 using ::floor; 359 using ::fma; 360 using ::fmax; 361 using ::fmin; 362 using ::fmod; 363 using ::fpclassify; 364 using ::frexp; 365 using ::hypot; 366 using ::ilogb; 367 using ::isfinite; 368 using ::isgreater; 369 using ::isgreaterequal; 370 using ::isless; 371 using ::islessequal; 372 using ::islessgreater; 373 using ::isnormal; 374 using ::isunordered; 375 using ::ldexp; 376 using ::lgamma; 377 using ::llrint; 378 using ::llround; 379 using ::log; 380 using ::log10; 381 using ::log1p; 382 using ::log2; 383 using ::logb; 384 using ::lrint; 385 using ::lround; 386 using ::nearbyint; 387 using ::nextafter; 388 using ::pow; 389 using ::remainder; 390 using ::remquo; 391 using ::rint; 392 using ::round; 393 using ::scalbln; 394 using ::scalbn; 395 using ::signbit; 396 using ::sin; 397 using ::sinh; 398 using ::sqrt; 399 using ::tan; 400 using ::tanh; 401 using ::tgamma; 402 using ::trunc; 403 404 // Well this is fun: We need to pull these symbols in for libc++, but we can't 405 // pull them in with libstdc++, because its ::isinf and ::isnan are different 406 // than its std::isinf and std::isnan. 407 #ifndef __GLIBCXX__ 408 using ::isinf; 409 using ::isnan; 410 #endif 411 412 // Finally, pull the "foobarf" functions that CUDA defines in its headers into 413 // namespace std. 414 using ::acosf; 415 using ::acoshf; 416 using ::asinf; 417 using ::asinhf; 418 using ::atan2f; 419 using ::atanf; 420 using ::atanhf; 421 using ::cbrtf; 422 using ::ceilf; 423 using ::copysignf; 424 using ::cosf; 425 using ::coshf; 426 using ::erfcf; 427 using ::erff; 428 using ::exp2f; 429 using ::expf; 430 using ::expm1f; 431 using ::fabsf; 432 using ::fdimf; 433 using ::floorf; 434 using ::fmaf; 435 using ::fmaxf; 436 using ::fminf; 437 using ::fmodf; 438 using ::frexpf; 439 using ::hypotf; 440 using ::ilogbf; 441 using ::ldexpf; 442 using ::lgammaf; 443 using ::llrintf; 444 using ::llroundf; 445 using ::log10f; 446 using ::log1pf; 447 using ::log2f; 448 using ::logbf; 449 using ::logf; 450 using ::lrintf; 451 using ::lroundf; 452 using ::modff; 453 using ::nearbyintf; 454 using ::nextafterf; 455 using ::powf; 456 using ::remainderf; 457 using ::remquof; 458 using ::rintf; 459 using ::roundf; 460 // TODO: remove once variant is supported 461 #ifndef _OPENMP 462 using ::scalblnf; 463 #endif 464 using ::scalbnf; 465 using ::sinf; 466 using ::sinhf; 467 using ::sqrtf; 468 using ::tanf; 469 using ::tanhf; 470 using ::tgammaf; 471 using ::truncf; 472 473 #ifdef _LIBCPP_END_NAMESPACE_STD 474 _LIBCPP_END_NAMESPACE_STD 475 #else 476 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 477 _GLIBCXX_END_NAMESPACE_VERSION 478 #endif 479 } // namespace std 480 #endif 481 482 #undef __NOEXCEPT 483 #undef __DEVICE__ 484 485 #endif 486