1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------=== 2 * 3 * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. 4 * See https://llvm.org/LICENSE.txt for license information. 5 * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 6 * 7 *===-----------------------------------------------------------------------=== 8 */ 9 #ifndef __CLANG_CUDA_CMATH_H__ 10 #define __CLANG_CUDA_CMATH_H__ 11 #ifndef __CUDA__ 12 #error "This file is for CUDA compilation only." 13 #endif 14 15 #ifndef __OPENMP_NVPTX__ 16 #include <limits> 17 #endif 18 19 // CUDA lets us use various std math functions on the device side. This file 20 // works in concert with __clang_cuda_math_forward_declares.h to make this work. 21 // 22 // Specifically, the forward-declares header declares __device__ overloads for 23 // these functions in the global namespace, then pulls them into namespace std 24 // with 'using' statements. Then this file implements those functions, after 25 // their implementations have been pulled in. 26 // 27 // It's important that we declare the functions in the global namespace and pull 28 // them into namespace std with using statements, as opposed to simply declaring 29 // these functions in namespace std, because our device functions need to 30 // overload the standard library functions, which may be declared in the global 31 // namespace or in std, depending on the degree of conformance of the stdlib 32 // implementation. Declaring in the global namespace and pulling into namespace 33 // std covers all of the known knowns. 34 35 #ifdef __OPENMP_NVPTX__ 36 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow)) 37 #else 38 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline)) 39 #endif 40 41 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); } 42 __DEVICE__ long abs(long __n) { return ::labs(__n); } 43 __DEVICE__ float abs(float __x) { return ::fabsf(__x); } 44 __DEVICE__ double abs(double __x) { return ::fabs(__x); } 45 __DEVICE__ float acos(float __x) { return ::acosf(__x); } 46 __DEVICE__ float asin(float __x) { return ::asinf(__x); } 47 __DEVICE__ float atan(float __x) { return ::atanf(__x); } 48 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); } 49 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); } 50 __DEVICE__ float cos(float __x) { return ::cosf(__x); } 51 __DEVICE__ float cosh(float __x) { return ::coshf(__x); } 52 __DEVICE__ float exp(float __x) { return ::expf(__x); } 53 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); } 54 __DEVICE__ float floor(float __x) { return ::floorf(__x); } 55 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); } 56 __DEVICE__ int fpclassify(float __x) { 57 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 58 FP_ZERO, __x); 59 } 60 __DEVICE__ int fpclassify(double __x) { 61 return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL, 62 FP_ZERO, __x); 63 } 64 __DEVICE__ float frexp(float __arg, int *__exp) { 65 return ::frexpf(__arg, __exp); 66 } 67 68 // For inscrutable reasons, the CUDA headers define these functions for us on 69 // Windows. For OpenMP we omit these as some old system headers have 70 // non-conforming `isinf(float)` and `isnan(float)` implementations that return 71 // an `int`. The system versions of these functions should be fine anyway. 72 #if !defined(_MSC_VER) && !defined(__OPENMP_NVPTX__) 73 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } 74 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } 75 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } 76 // For inscrutable reasons, __finite(), the double-precision version of 77 // __finitef, does not exist when compiling for MacOS. __isfinited is available 78 // everywhere and is just as good. 79 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); } 80 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } 81 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } 82 #endif 83 84 __DEVICE__ bool isgreater(float __x, float __y) { 85 return __builtin_isgreater(__x, __y); 86 } 87 __DEVICE__ bool isgreater(double __x, double __y) { 88 return __builtin_isgreater(__x, __y); 89 } 90 __DEVICE__ bool isgreaterequal(float __x, float __y) { 91 return __builtin_isgreaterequal(__x, __y); 92 } 93 __DEVICE__ bool isgreaterequal(double __x, double __y) { 94 return __builtin_isgreaterequal(__x, __y); 95 } 96 __DEVICE__ bool isless(float __x, float __y) { 97 return __builtin_isless(__x, __y); 98 } 99 __DEVICE__ bool isless(double __x, double __y) { 100 return __builtin_isless(__x, __y); 101 } 102 __DEVICE__ bool islessequal(float __x, float __y) { 103 return __builtin_islessequal(__x, __y); 104 } 105 __DEVICE__ bool islessequal(double __x, double __y) { 106 return __builtin_islessequal(__x, __y); 107 } 108 __DEVICE__ bool islessgreater(float __x, float __y) { 109 return __builtin_islessgreater(__x, __y); 110 } 111 __DEVICE__ bool islessgreater(double __x, double __y) { 112 return __builtin_islessgreater(__x, __y); 113 } 114 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } 115 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } 116 __DEVICE__ bool isunordered(float __x, float __y) { 117 return __builtin_isunordered(__x, __y); 118 } 119 __DEVICE__ bool isunordered(double __x, double __y) { 120 return __builtin_isunordered(__x, __y); 121 } 122 __DEVICE__ float ldexp(float __arg, int __exp) { 123 return ::ldexpf(__arg, __exp); 124 } 125 __DEVICE__ float log(float __x) { return ::logf(__x); } 126 __DEVICE__ float log10(float __x) { return ::log10f(__x); } 127 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); } 128 __DEVICE__ float pow(float __base, float __exp) { 129 return ::powf(__base, __exp); 130 } 131 __DEVICE__ float pow(float __base, int __iexp) { 132 return ::powif(__base, __iexp); 133 } 134 __DEVICE__ double pow(double __base, int __iexp) { 135 return ::powi(__base, __iexp); 136 } 137 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); } 138 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); } 139 __DEVICE__ float sin(float __x) { return ::sinf(__x); } 140 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); } 141 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); } 142 __DEVICE__ float tan(float __x) { return ::tanf(__x); } 143 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); } 144 145 // Notably missing above is nexttoward. We omit it because 146 // libdevice doesn't provide an implementation, and we don't want to be in the 147 // business of implementing tricky libm functions in this header. 148 149 #ifndef __OPENMP_NVPTX__ 150 151 // Now we've defined everything we promised we'd define in 152 // __clang_cuda_math_forward_declares.h. We need to do two additional things to 153 // fix up our math functions. 154 // 155 // 1) Define __device__ overloads for e.g. sin(int). The CUDA headers define 156 // only sin(float) and sin(double), which means that e.g. sin(0) is 157 // ambiguous. 158 // 159 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace 160 // std. These are defined in the CUDA headers in the global namespace, 161 // independent of everything else we've done here. 162 163 // We can't use std::enable_if, because we want to be pre-C++11 compatible. But 164 // we go ahead and unconditionally define functions that are only available when 165 // compiling for C++11 to match the behavior of the CUDA headers. 166 template<bool __B, class __T = void> 167 struct __clang_cuda_enable_if {}; 168 169 template <class __T> struct __clang_cuda_enable_if<true, __T> { 170 typedef __T type; 171 }; 172 173 // Defines an overload of __fn that accepts one integral argument, calls 174 // __fn((double)x), and returns __retty. 175 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn) \ 176 template <typename __T> \ 177 __DEVICE__ \ 178 typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, \ 179 __retty>::type \ 180 __fn(__T __x) { \ 181 return ::__fn((double)__x); \ 182 } 183 184 // Defines an overload of __fn that accepts one two arithmetic arguments, calls 185 // __fn((double)x, (double)y), and returns a double. 186 // 187 // Note this is different from OVERLOAD_1, which generates an overload that 188 // accepts only *integral* arguments. 189 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn) \ 190 template <typename __T1, typename __T2> \ 191 __DEVICE__ typename __clang_cuda_enable_if< \ 192 std::numeric_limits<__T1>::is_specialized && \ 193 std::numeric_limits<__T2>::is_specialized, \ 194 __retty>::type \ 195 __fn(__T1 __x, __T2 __y) { \ 196 return __fn((double)__x, (double)__y); \ 197 } 198 199 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos) 200 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh) 201 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin) 202 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh) 203 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan) 204 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2); 205 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh) 206 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt) 207 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil) 208 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign); 209 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos) 210 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh) 211 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf) 212 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc) 213 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp) 214 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2) 215 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1) 216 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs) 217 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim); 218 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor) 219 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax); 220 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin); 221 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod); 222 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify) 223 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot); 224 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb) 225 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite) 226 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater); 227 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal); 228 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf); 229 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless); 230 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal); 231 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater); 232 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan); 233 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal) 234 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered); 235 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma) 236 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log) 237 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10) 238 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p) 239 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2) 240 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb) 241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint) 242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround) 243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint) 244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround) 245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint); 246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter); 247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow); 248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder); 249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint); 250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round); 251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit) 252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin) 253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh) 254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt) 255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan) 256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh) 257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma) 258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc); 259 260 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1 261 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2 262 263 // Overloads for functions that don't match the patterns expected by 264 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}. 265 template <typename __T1, typename __T2, typename __T3> 266 __DEVICE__ typename __clang_cuda_enable_if< 267 std::numeric_limits<__T1>::is_specialized && 268 std::numeric_limits<__T2>::is_specialized && 269 std::numeric_limits<__T3>::is_specialized, 270 double>::type 271 fma(__T1 __x, __T2 __y, __T3 __z) { 272 return std::fma((double)__x, (double)__y, (double)__z); 273 } 274 275 template <typename __T> 276 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 277 double>::type 278 frexp(__T __x, int *__exp) { 279 return std::frexp((double)__x, __exp); 280 } 281 282 template <typename __T> 283 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 284 double>::type 285 ldexp(__T __x, int __exp) { 286 return std::ldexp((double)__x, __exp); 287 } 288 289 template <typename __T1, typename __T2> 290 __DEVICE__ typename __clang_cuda_enable_if< 291 std::numeric_limits<__T1>::is_specialized && 292 std::numeric_limits<__T2>::is_specialized, 293 double>::type 294 remquo(__T1 __x, __T2 __y, int *__quo) { 295 return std::remquo((double)__x, (double)__y, __quo); 296 } 297 298 template <typename __T> 299 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 300 double>::type 301 scalbln(__T __x, long __exp) { 302 return std::scalbln((double)__x, __exp); 303 } 304 305 template <typename __T> 306 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer, 307 double>::type 308 scalbn(__T __x, int __exp) { 309 return std::scalbn((double)__x, __exp); 310 } 311 312 // We need to define these overloads in exactly the namespace our standard 313 // library uses (including the right inline namespace), otherwise they won't be 314 // picked up by other functions in the standard library (e.g. functions in 315 // <complex>). Thus the ugliness below. 316 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD 317 _LIBCPP_BEGIN_NAMESPACE_STD 318 #else 319 namespace std { 320 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 321 _GLIBCXX_BEGIN_NAMESPACE_VERSION 322 #endif 323 #endif 324 325 // Pull the new overloads we defined above into namespace std. 326 using ::acos; 327 using ::acosh; 328 using ::asin; 329 using ::asinh; 330 using ::atan; 331 using ::atan2; 332 using ::atanh; 333 using ::cbrt; 334 using ::ceil; 335 using ::copysign; 336 using ::cos; 337 using ::cosh; 338 using ::erf; 339 using ::erfc; 340 using ::exp; 341 using ::exp2; 342 using ::expm1; 343 using ::fabs; 344 using ::fdim; 345 using ::floor; 346 using ::fma; 347 using ::fmax; 348 using ::fmin; 349 using ::fmod; 350 using ::fpclassify; 351 using ::frexp; 352 using ::hypot; 353 using ::ilogb; 354 using ::isfinite; 355 using ::isgreater; 356 using ::isgreaterequal; 357 using ::isless; 358 using ::islessequal; 359 using ::islessgreater; 360 using ::isnormal; 361 using ::isunordered; 362 using ::ldexp; 363 using ::lgamma; 364 using ::llrint; 365 using ::llround; 366 using ::log; 367 using ::log10; 368 using ::log1p; 369 using ::log2; 370 using ::logb; 371 using ::lrint; 372 using ::lround; 373 using ::nearbyint; 374 using ::nextafter; 375 using ::pow; 376 using ::remainder; 377 using ::remquo; 378 using ::rint; 379 using ::round; 380 using ::scalbln; 381 using ::scalbn; 382 using ::signbit; 383 using ::sin; 384 using ::sinh; 385 using ::sqrt; 386 using ::tan; 387 using ::tanh; 388 using ::tgamma; 389 using ::trunc; 390 391 // Well this is fun: We need to pull these symbols in for libc++, but we can't 392 // pull them in with libstdc++, because its ::isinf and ::isnan are different 393 // than its std::isinf and std::isnan. 394 #ifndef __GLIBCXX__ 395 using ::isinf; 396 using ::isnan; 397 #endif 398 399 // Finally, pull the "foobarf" functions that CUDA defines in its headers into 400 // namespace std. 401 using ::acosf; 402 using ::acoshf; 403 using ::asinf; 404 using ::asinhf; 405 using ::atan2f; 406 using ::atanf; 407 using ::atanhf; 408 using ::cbrtf; 409 using ::ceilf; 410 using ::copysignf; 411 using ::cosf; 412 using ::coshf; 413 using ::erfcf; 414 using ::erff; 415 using ::exp2f; 416 using ::expf; 417 using ::expm1f; 418 using ::fabsf; 419 using ::fdimf; 420 using ::floorf; 421 using ::fmaf; 422 using ::fmaxf; 423 using ::fminf; 424 using ::fmodf; 425 using ::frexpf; 426 using ::hypotf; 427 using ::ilogbf; 428 using ::ldexpf; 429 using ::lgammaf; 430 using ::llrintf; 431 using ::llroundf; 432 using ::log10f; 433 using ::log1pf; 434 using ::log2f; 435 using ::logbf; 436 using ::logf; 437 using ::lrintf; 438 using ::lroundf; 439 using ::modff; 440 using ::nearbyintf; 441 using ::nextafterf; 442 using ::powf; 443 using ::remainderf; 444 using ::remquof; 445 using ::rintf; 446 using ::roundf; 447 using ::scalblnf; 448 using ::scalbnf; 449 using ::sinf; 450 using ::sinhf; 451 using ::sqrtf; 452 using ::tanf; 453 using ::tanhf; 454 using ::tgammaf; 455 using ::truncf; 456 457 #ifdef _LIBCPP_END_NAMESPACE_STD 458 _LIBCPP_END_NAMESPACE_STD 459 #else 460 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION 461 _GLIBCXX_END_NAMESPACE_VERSION 462 #endif 463 } // namespace std 464 #endif 465 466 #endif // __OPENMP_NVPTX__ 467 468 #undef __DEVICE__ 469 470 #endif 471