xref: /freebsd/contrib/llvm-project/clang/lib/Headers/__clang_cuda_cmath.h (revision 59144db3fca192c4637637dfe6b5a5d98632cd47)
1 /*===---- __clang_cuda_cmath.h - Device-side CUDA cmath support ------------===
2  *
3  * Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4  * See https://llvm.org/LICENSE.txt for license information.
5  * SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6  *
7  *===-----------------------------------------------------------------------===
8  */
9 #ifndef __CLANG_CUDA_CMATH_H__
10 #define __CLANG_CUDA_CMATH_H__
11 #ifndef __CUDA__
12 #error "This file is for CUDA compilation only."
13 #endif
14 
15 #ifndef __OPENMP_NVPTX__
16 #include <limits>
17 #endif
18 
19 // CUDA lets us use various std math functions on the device side.  This file
20 // works in concert with __clang_cuda_math_forward_declares.h to make this work.
21 //
22 // Specifically, the forward-declares header declares __device__ overloads for
23 // these functions in the global namespace, then pulls them into namespace std
24 // with 'using' statements.  Then this file implements those functions, after
25 // their implementations have been pulled in.
26 //
27 // It's important that we declare the functions in the global namespace and pull
28 // them into namespace std with using statements, as opposed to simply declaring
29 // these functions in namespace std, because our device functions need to
30 // overload the standard library functions, which may be declared in the global
31 // namespace or in std, depending on the degree of conformance of the stdlib
32 // implementation.  Declaring in the global namespace and pulling into namespace
33 // std covers all of the known knowns.
34 
35 #ifdef __OPENMP_NVPTX__
36 #define __DEVICE__ static constexpr __attribute__((always_inline, nothrow))
37 #else
38 #define __DEVICE__ static __device__ __inline__ __attribute__((always_inline))
39 #endif
40 
41 __DEVICE__ long long abs(long long __n) { return ::llabs(__n); }
42 __DEVICE__ long abs(long __n) { return ::labs(__n); }
43 __DEVICE__ float abs(float __x) { return ::fabsf(__x); }
44 __DEVICE__ double abs(double __x) { return ::fabs(__x); }
45 __DEVICE__ float acos(float __x) { return ::acosf(__x); }
46 __DEVICE__ float asin(float __x) { return ::asinf(__x); }
47 __DEVICE__ float atan(float __x) { return ::atanf(__x); }
48 __DEVICE__ float atan2(float __x, float __y) { return ::atan2f(__x, __y); }
49 __DEVICE__ float ceil(float __x) { return ::ceilf(__x); }
50 __DEVICE__ float cos(float __x) { return ::cosf(__x); }
51 __DEVICE__ float cosh(float __x) { return ::coshf(__x); }
52 __DEVICE__ float exp(float __x) { return ::expf(__x); }
53 __DEVICE__ float fabs(float __x) { return ::fabsf(__x); }
54 __DEVICE__ float floor(float __x) { return ::floorf(__x); }
55 __DEVICE__ float fmod(float __x, float __y) { return ::fmodf(__x, __y); }
56 __DEVICE__ int fpclassify(float __x) {
57   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
58                               FP_ZERO, __x);
59 }
60 __DEVICE__ int fpclassify(double __x) {
61   return __builtin_fpclassify(FP_NAN, FP_INFINITE, FP_NORMAL, FP_SUBNORMAL,
62                               FP_ZERO, __x);
63 }
64 __DEVICE__ float frexp(float __arg, int *__exp) {
65   return ::frexpf(__arg, __exp);
66 }
67 
68 // For inscrutable reasons, the CUDA headers define these functions for us on
69 // Windows.
70 #if !defined(_MSC_VER) || defined(__OPENMP_NVPTX__)
71 
72 // For OpenMP we work around some old system headers that have non-conforming
73 // `isinf(float)` and `isnan(float)` implementations that return an `int`. We do
74 // this by providing two versions of these functions, differing only in the
75 // return type. To avoid conflicting definitions we disable implicit base
76 // function generation. That means we will end up with two specializations, one
77 // per type, but only one has a base function defined by the system header.
78 #if defined(__OPENMP_NVPTX__)
79 #pragma omp begin declare variant match(                                       \
80     implementation = {extension(disable_implicit_base)})
81 
82 // FIXME: We lack an extension to customize the mangling of the variants, e.g.,
83 //        add a suffix. This means we would clash with the names of the variants
84 //        (note that we do not create implicit base functions here). To avoid
85 //        this clash we add a new trait to some of them that is always true
86 //        (this is LLVM after all ;)). It will only influence the mangled name
87 //        of the variants inside the inner region and avoid the clash.
88 #pragma omp begin declare variant match(implementation = {vendor(llvm)})
89 
90 __DEVICE__ int isinf(float __x) { return ::__isinff(__x); }
91 __DEVICE__ int isinf(double __x) { return ::__isinf(__x); }
92 __DEVICE__ int isfinite(float __x) { return ::__finitef(__x); }
93 __DEVICE__ int isfinite(double __x) { return ::__isfinited(__x); }
94 __DEVICE__ int isnan(float __x) { return ::__isnanf(__x); }
95 __DEVICE__ int isnan(double __x) { return ::__isnan(__x); }
96 
97 #pragma omp end declare variant
98 
99 #endif
100 
101 __DEVICE__ bool isinf(float __x) { return ::__isinff(__x); }
102 __DEVICE__ bool isinf(double __x) { return ::__isinf(__x); }
103 __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); }
104 // For inscrutable reasons, __finite(), the double-precision version of
105 // __finitef, does not exist when compiling for MacOS.  __isfinited is available
106 // everywhere and is just as good.
107 __DEVICE__ bool isfinite(double __x) { return ::__isfinited(__x); }
108 __DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); }
109 __DEVICE__ bool isnan(double __x) { return ::__isnan(__x); }
110 
111 #if defined(__OPENMP_NVPTX__)
112 #pragma omp end declare variant
113 #endif
114 
115 #endif
116 
117 __DEVICE__ bool isgreater(float __x, float __y) {
118   return __builtin_isgreater(__x, __y);
119 }
120 __DEVICE__ bool isgreater(double __x, double __y) {
121   return __builtin_isgreater(__x, __y);
122 }
123 __DEVICE__ bool isgreaterequal(float __x, float __y) {
124   return __builtin_isgreaterequal(__x, __y);
125 }
126 __DEVICE__ bool isgreaterequal(double __x, double __y) {
127   return __builtin_isgreaterequal(__x, __y);
128 }
129 __DEVICE__ bool isless(float __x, float __y) {
130   return __builtin_isless(__x, __y);
131 }
132 __DEVICE__ bool isless(double __x, double __y) {
133   return __builtin_isless(__x, __y);
134 }
135 __DEVICE__ bool islessequal(float __x, float __y) {
136   return __builtin_islessequal(__x, __y);
137 }
138 __DEVICE__ bool islessequal(double __x, double __y) {
139   return __builtin_islessequal(__x, __y);
140 }
141 __DEVICE__ bool islessgreater(float __x, float __y) {
142   return __builtin_islessgreater(__x, __y);
143 }
144 __DEVICE__ bool islessgreater(double __x, double __y) {
145   return __builtin_islessgreater(__x, __y);
146 }
147 __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); }
148 __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); }
149 __DEVICE__ bool isunordered(float __x, float __y) {
150   return __builtin_isunordered(__x, __y);
151 }
152 __DEVICE__ bool isunordered(double __x, double __y) {
153   return __builtin_isunordered(__x, __y);
154 }
155 __DEVICE__ float ldexp(float __arg, int __exp) {
156   return ::ldexpf(__arg, __exp);
157 }
158 __DEVICE__ float log(float __x) { return ::logf(__x); }
159 __DEVICE__ float log10(float __x) { return ::log10f(__x); }
160 __DEVICE__ float modf(float __x, float *__iptr) { return ::modff(__x, __iptr); }
161 __DEVICE__ float pow(float __base, float __exp) {
162   return ::powf(__base, __exp);
163 }
164 __DEVICE__ float pow(float __base, int __iexp) {
165   return ::powif(__base, __iexp);
166 }
167 __DEVICE__ double pow(double __base, int __iexp) {
168   return ::powi(__base, __iexp);
169 }
170 __DEVICE__ bool signbit(float __x) { return ::__signbitf(__x); }
171 __DEVICE__ bool signbit(double __x) { return ::__signbitd(__x); }
172 __DEVICE__ float sin(float __x) { return ::sinf(__x); }
173 __DEVICE__ float sinh(float __x) { return ::sinhf(__x); }
174 __DEVICE__ float sqrt(float __x) { return ::sqrtf(__x); }
175 __DEVICE__ float tan(float __x) { return ::tanf(__x); }
176 __DEVICE__ float tanh(float __x) { return ::tanhf(__x); }
177 
178 // There was a redefinition error for this this overload in CUDA mode.
179 // We restrict it to OpenMP mode for now, that is where it is actually needed
180 // anyway.
181 #ifdef __OPENMP_NVPTX__
182 __DEVICE__ float remquo(float __n, float __d, int *__q) {
183   return ::remquof(__n, __d, __q);
184 }
185 #endif
186 
187 // Notably missing above is nexttoward.  We omit it because
188 // libdevice doesn't provide an implementation, and we don't want to be in the
189 // business of implementing tricky libm functions in this header.
190 
191 #ifndef __OPENMP_NVPTX__
192 
193 // Now we've defined everything we promised we'd define in
194 // __clang_cuda_math_forward_declares.h.  We need to do two additional things to
195 // fix up our math functions.
196 //
197 // 1) Define __device__ overloads for e.g. sin(int).  The CUDA headers define
198 //    only sin(float) and sin(double), which means that e.g. sin(0) is
199 //    ambiguous.
200 //
201 // 2) Pull the __device__ overloads of "foobarf" math functions into namespace
202 //    std.  These are defined in the CUDA headers in the global namespace,
203 //    independent of everything else we've done here.
204 
205 // We can't use std::enable_if, because we want to be pre-C++11 compatible.  But
206 // we go ahead and unconditionally define functions that are only available when
207 // compiling for C++11 to match the behavior of the CUDA headers.
208 template<bool __B, class __T = void>
209 struct __clang_cuda_enable_if {};
210 
211 template <class __T> struct __clang_cuda_enable_if<true, __T> {
212   typedef __T type;
213 };
214 
215 // Defines an overload of __fn that accepts one integral argument, calls
216 // __fn((double)x), and returns __retty.
217 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(__retty, __fn)                      \
218   template <typename __T>                                                      \
219   __DEVICE__                                                                   \
220       typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,    \
221                                       __retty>::type                           \
222       __fn(__T __x) {                                                          \
223     return ::__fn((double)__x);                                                \
224   }
225 
226 // Defines an overload of __fn that accepts one two arithmetic arguments, calls
227 // __fn((double)x, (double)y), and returns a double.
228 //
229 // Note this is different from OVERLOAD_1, which generates an overload that
230 // accepts only *integral* arguments.
231 #define __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(__retty, __fn)                      \
232   template <typename __T1, typename __T2>                                      \
233   __DEVICE__ typename __clang_cuda_enable_if<                                  \
234       std::numeric_limits<__T1>::is_specialized &&                             \
235           std::numeric_limits<__T2>::is_specialized,                           \
236       __retty>::type                                                           \
237   __fn(__T1 __x, __T2 __y) {                                                   \
238     return __fn((double)__x, (double)__y);                                     \
239   }
240 
241 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acos)
242 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, acosh)
243 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asin)
244 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, asinh)
245 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atan)
246 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, atan2);
247 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, atanh)
248 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cbrt)
249 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, ceil)
250 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, copysign);
251 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cos)
252 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, cosh)
253 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erf)
254 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, erfc)
255 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp)
256 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, exp2)
257 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, expm1)
258 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, fabs)
259 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fdim);
260 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, floor)
261 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmax);
262 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmin);
263 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, fmod);
264 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, fpclassify)
265 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, hypot);
266 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(int, ilogb)
267 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isfinite)
268 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreater);
269 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isgreaterequal);
270 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isinf);
271 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isless);
272 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessequal);
273 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, islessgreater);
274 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnan);
275 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, isnormal)
276 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(bool, isunordered);
277 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, lgamma)
278 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log)
279 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log10)
280 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log1p)
281 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, log2)
282 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, logb)
283 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llrint)
284 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long long, llround)
285 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lrint)
286 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(long, lround)
287 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, nearbyint);
288 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, nextafter);
289 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, pow);
290 __CUDA_CLANG_FN_INTEGER_OVERLOAD_2(double, remainder);
291 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, rint);
292 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, round);
293 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(bool, signbit)
294 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sin)
295 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sinh)
296 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, sqrt)
297 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tan)
298 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tanh)
299 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, tgamma)
300 __CUDA_CLANG_FN_INTEGER_OVERLOAD_1(double, trunc);
301 
302 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_1
303 #undef __CUDA_CLANG_FN_INTEGER_OVERLOAD_2
304 
305 // Overloads for functions that don't match the patterns expected by
306 // __CUDA_CLANG_FN_INTEGER_OVERLOAD_{1,2}.
307 template <typename __T1, typename __T2, typename __T3>
308 __DEVICE__ typename __clang_cuda_enable_if<
309     std::numeric_limits<__T1>::is_specialized &&
310         std::numeric_limits<__T2>::is_specialized &&
311         std::numeric_limits<__T3>::is_specialized,
312     double>::type
313 fma(__T1 __x, __T2 __y, __T3 __z) {
314   return std::fma((double)__x, (double)__y, (double)__z);
315 }
316 
317 template <typename __T>
318 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
319                                            double>::type
320 frexp(__T __x, int *__exp) {
321   return std::frexp((double)__x, __exp);
322 }
323 
324 template <typename __T>
325 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
326                                            double>::type
327 ldexp(__T __x, int __exp) {
328   return std::ldexp((double)__x, __exp);
329 }
330 
331 template <typename __T1, typename __T2>
332 __DEVICE__ typename __clang_cuda_enable_if<
333     std::numeric_limits<__T1>::is_specialized &&
334         std::numeric_limits<__T2>::is_specialized,
335     double>::type
336 remquo(__T1 __x, __T2 __y, int *__quo) {
337   return std::remquo((double)__x, (double)__y, __quo);
338 }
339 
340 template <typename __T>
341 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
342                                            double>::type
343 scalbln(__T __x, long __exp) {
344   return std::scalbln((double)__x, __exp);
345 }
346 
347 template <typename __T>
348 __DEVICE__ typename __clang_cuda_enable_if<std::numeric_limits<__T>::is_integer,
349                                            double>::type
350 scalbn(__T __x, int __exp) {
351   return std::scalbn((double)__x, __exp);
352 }
353 
354 // We need to define these overloads in exactly the namespace our standard
355 // library uses (including the right inline namespace), otherwise they won't be
356 // picked up by other functions in the standard library (e.g. functions in
357 // <complex>).  Thus the ugliness below.
358 #ifdef _LIBCPP_BEGIN_NAMESPACE_STD
359 _LIBCPP_BEGIN_NAMESPACE_STD
360 #else
361 namespace std {
362 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
363 _GLIBCXX_BEGIN_NAMESPACE_VERSION
364 #endif
365 #endif
366 
367 // Pull the new overloads we defined above into namespace std.
368 using ::acos;
369 using ::acosh;
370 using ::asin;
371 using ::asinh;
372 using ::atan;
373 using ::atan2;
374 using ::atanh;
375 using ::cbrt;
376 using ::ceil;
377 using ::copysign;
378 using ::cos;
379 using ::cosh;
380 using ::erf;
381 using ::erfc;
382 using ::exp;
383 using ::exp2;
384 using ::expm1;
385 using ::fabs;
386 using ::fdim;
387 using ::floor;
388 using ::fma;
389 using ::fmax;
390 using ::fmin;
391 using ::fmod;
392 using ::fpclassify;
393 using ::frexp;
394 using ::hypot;
395 using ::ilogb;
396 using ::isfinite;
397 using ::isgreater;
398 using ::isgreaterequal;
399 using ::isless;
400 using ::islessequal;
401 using ::islessgreater;
402 using ::isnormal;
403 using ::isunordered;
404 using ::ldexp;
405 using ::lgamma;
406 using ::llrint;
407 using ::llround;
408 using ::log;
409 using ::log10;
410 using ::log1p;
411 using ::log2;
412 using ::logb;
413 using ::lrint;
414 using ::lround;
415 using ::nearbyint;
416 using ::nextafter;
417 using ::pow;
418 using ::remainder;
419 using ::remquo;
420 using ::rint;
421 using ::round;
422 using ::scalbln;
423 using ::scalbn;
424 using ::signbit;
425 using ::sin;
426 using ::sinh;
427 using ::sqrt;
428 using ::tan;
429 using ::tanh;
430 using ::tgamma;
431 using ::trunc;
432 
433 // Well this is fun: We need to pull these symbols in for libc++, but we can't
434 // pull them in with libstdc++, because its ::isinf and ::isnan are different
435 // than its std::isinf and std::isnan.
436 #ifndef __GLIBCXX__
437 using ::isinf;
438 using ::isnan;
439 #endif
440 
441 // Finally, pull the "foobarf" functions that CUDA defines in its headers into
442 // namespace std.
443 using ::acosf;
444 using ::acoshf;
445 using ::asinf;
446 using ::asinhf;
447 using ::atan2f;
448 using ::atanf;
449 using ::atanhf;
450 using ::cbrtf;
451 using ::ceilf;
452 using ::copysignf;
453 using ::cosf;
454 using ::coshf;
455 using ::erfcf;
456 using ::erff;
457 using ::exp2f;
458 using ::expf;
459 using ::expm1f;
460 using ::fabsf;
461 using ::fdimf;
462 using ::floorf;
463 using ::fmaf;
464 using ::fmaxf;
465 using ::fminf;
466 using ::fmodf;
467 using ::frexpf;
468 using ::hypotf;
469 using ::ilogbf;
470 using ::ldexpf;
471 using ::lgammaf;
472 using ::llrintf;
473 using ::llroundf;
474 using ::log10f;
475 using ::log1pf;
476 using ::log2f;
477 using ::logbf;
478 using ::logf;
479 using ::lrintf;
480 using ::lroundf;
481 using ::modff;
482 using ::nearbyintf;
483 using ::nextafterf;
484 using ::powf;
485 using ::remainderf;
486 using ::remquof;
487 using ::rintf;
488 using ::roundf;
489 using ::scalblnf;
490 using ::scalbnf;
491 using ::sinf;
492 using ::sinhf;
493 using ::sqrtf;
494 using ::tanf;
495 using ::tanhf;
496 using ::tgammaf;
497 using ::truncf;
498 
499 #ifdef _LIBCPP_END_NAMESPACE_STD
500 _LIBCPP_END_NAMESPACE_STD
501 #else
502 #ifdef _GLIBCXX_BEGIN_NAMESPACE_VERSION
503 _GLIBCXX_END_NAMESPACE_VERSION
504 #endif
505 } // namespace std
506 #endif
507 
508 #endif // __OPENMP_NVPTX__
509 
510 #undef __DEVICE__
511 
512 #endif
513